BeyondCX_Insights/cli.py

#!/usr/bin/env python3
"""
CXInsights - Command Line Interface

Main entry point for running the analysis pipeline.
"""

import argparse
import logging
import sys
from pathlib import Path

from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

from src.pipeline import CXInsightsPipeline, PipelineConfig


def setup_logging(verbose: bool = False) -> None:
    """Configure logging."""
    level = logging.DEBUG if verbose else logging.INFO
    logging.basicConfig(
        level=level,
        format="%(asctime)s | %(levelname)-8s | %(name)s | %(message)s",
        datefmt="%Y-%m-%d %H:%M:%S",
    )


def progress_callback(stage: str, current: int, total: int) -> None:
    """Print progress to console."""
    if total > 0:
        pct = current / total * 100
        bar_len = 30
        filled = int(bar_len * current / total)
        bar = "█" * filled + "░" * (bar_len - filled)
        print(f"\r{stage}: [{bar}] {pct:.0f}% ({current}/{total})", end="", flush=True)
        if current == total:
            print()  # New line when complete


def cmd_run(args: argparse.Namespace) -> int:
    """Run the analysis pipeline."""
    print("=" * 60)
    print("CXInsights - Call Analysis Pipeline")
    print("=" * 60)

    # Build config
    config = PipelineConfig(
        input_dir=Path(args.input) if args.input else Path("data/audio"),
        output_dir=Path(args.output) if args.output else Path("data/output"),
        checkpoint_dir=Path(args.checkpoint) if args.checkpoint else Path("data/.checkpoints"),
        inference_model=args.model,
        use_compression=not args.no_compression,
        export_formats=args.formats.split(",") if args.formats else ["json", "excel"],
        auto_resume=not args.no_resume,
    )

    print(f"\nConfiguration:")
    print(f"  Input:       {config.input_dir}")
    print(f"  Output:      {config.output_dir}")
    print(f"  Model:       {config.inference_model}")
    print(f"  Compression: {'Enabled' if config.use_compression else 'Disabled'}")
    print(f"  Formats:     {', '.join(config.export_formats)}")
    print()

    # Check for transcripts
    transcripts_file = Path(args.transcripts) if args.transcripts else None

    if transcripts_file and transcripts_file.exists():
        print(f"Loading transcripts from: {transcripts_file}")
        # Load transcripts (placeholder - would need actual loading logic)
        print("Note: Transcript loading not fully implemented in CLI")
        return 1

    # Check for audio files
    audio_files = list(config.input_dir.glob("*.wav")) + list(config.input_dir.glob("*.mp3"))

    if not audio_files and not transcripts_file:
        print(f"Error: No audio files found in {config.input_dir}")
        print("Please provide audio files or use --transcripts option")
        return 1

    print(f"Found {len(audio_files)} audio files")

    # Run pipeline
    pipeline = CXInsightsPipeline(
        config=config,
        progress_callback=progress_callback if not args.quiet else None,
    )

    try:
        result = pipeline.run(
            batch_id=args.batch_id,
            audio_files=audio_files if audio_files else None,
            resume=not args.no_resume,
        )

        print("\n" + "=" * 60)
        print("Pipeline Complete!")
        print("=" * 60)
        print(f"\nResults:")
        print(f"  Total calls:     {result.total_calls_processed}")
        print(f"  Successful:      {result.successful_analyses}")
        print(f"  Failed:          {result.failed_analyses}")
        print(f"  Lost sales:      {len(result.lost_sales_frequencies)} drivers")
        print(f"  Poor CX:         {len(result.poor_cx_frequencies)} drivers")

        if result.rca_tree:
            tree = result.rca_tree
            print(f"\n  Top lost sales:  {', '.join(tree.top_lost_sales_drivers[:3])}")
            print(f"  Top poor CX:     {', '.join(tree.top_poor_cx_drivers[:3])}")

        print(f"\nOutput: {config.output_dir / args.batch_id}")

        return 0

    except Exception as e:
        logging.error(f"Pipeline failed: {e}")
        if args.verbose:
            import traceback
            traceback.print_exc()
        return 1


def cmd_status(args: argparse.Namespace) -> int:
    """Show pipeline status."""
    from src.pipeline.models import PipelineManifest

    checkpoint_dir = Path(args.checkpoint) if args.checkpoint else Path("data/.checkpoints")
    manifest_path = checkpoint_dir / f"pipeline_{args.batch_id}.json"

    if not manifest_path.exists():
        print(f"No pipeline found for batch: {args.batch_id}")
        return 1

    manifest = PipelineManifest.load(manifest_path)

    print(f"\nPipeline Status: {manifest.batch_id}")
    print("=" * 50)
    print(f"Status: {manifest.status.value}")
    print(f"Created: {manifest.created_at}")
    print(f"Total duration: {manifest.total_duration_sec:.1f}s")
    print()

    print("Stages:")
    for stage, stage_manifest in manifest.stages.items():
        status_icon = {
            "pending": "⏳",
            "running": "🔄",
            "completed": "✅",
            "failed": "❌",
            "skipped": "⏭️",
        }.get(stage_manifest.status.value, "?")

        duration = f"({stage_manifest.duration_sec:.1f}s)" if stage_manifest.duration_sec else ""
        print(f"  {status_icon} {stage.value}: {stage_manifest.status.value} {duration}")
        if stage_manifest.processed_items > 0:
            print(f"      Processed: {stage_manifest.processed_items}/{stage_manifest.total_items}")

    return 0


def cmd_export(args: argparse.Namespace) -> int:
    """Export results to different formats."""
    print("Export command - not yet implemented")
    print("Use the run command with --formats option instead")
    return 1


def main() -> int:
    """Main entry point."""
    parser = argparse.ArgumentParser(
        description="CXInsights - Call Center Analysis Pipeline",
        formatter_class=argparse.RawDescriptionHelpFormatter,
    )
    parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output")
    parser.add_argument("-q", "--quiet", action="store_true", help="Quiet output (no progress)")

    subparsers = parser.add_subparsers(dest="command", help="Available commands")

    # Run command
    run_parser = subparsers.add_parser("run", help="Run the analysis pipeline")
    run_parser.add_argument("batch_id", help="Unique batch identifier")
    run_parser.add_argument("-i", "--input", help="Input directory with audio files")
    run_parser.add_argument("-o", "--output", help="Output directory")
    run_parser.add_argument("-c", "--checkpoint", help="Checkpoint directory")
    run_parser.add_argument("-t", "--transcripts", help="Pre-existing transcripts file (JSON)")
    run_parser.add_argument("-m", "--model", default="gpt-4o-mini", help="LLM model to use")
    run_parser.add_argument("-f", "--formats", default="json,excel", help="Export formats (comma-separated)")
    run_parser.add_argument("--no-compression", action="store_true", help="Disable transcript compression")
    run_parser.add_argument("--no-resume", action="store_true", help="Don't resume from checkpoint")
    run_parser.set_defaults(func=cmd_run)

    # Status command
    status_parser = subparsers.add_parser("status", help="Show pipeline status")
    status_parser.add_argument("batch_id", help="Batch ID to check")
    status_parser.add_argument("-c", "--checkpoint", help="Checkpoint directory")
    status_parser.set_defaults(func=cmd_status)

    # Export command
    export_parser = subparsers.add_parser("export", help="Export results")
    export_parser.add_argument("batch_id", help="Batch ID to export")
    export_parser.add_argument("-f", "--format", choices=["json", "excel", "pdf"], default="json")
    export_parser.add_argument("-o", "--output", help="Output directory")
    export_parser.set_defaults(func=cmd_export)

    args = parser.parse_args()

    if not args.command:
        parser.print_help()
        return 0

    setup_logging(args.verbose)

    return args.func(args)


if __name__ == "__main__":
    sys.exit(main())