#!/usr/bin/env python3 """ CXInsights - Command Line Interface Main entry point for running the analysis pipeline. """ import argparse import logging import sys from pathlib import Path from dotenv import load_dotenv # Load environment variables from .env file load_dotenv() from src.pipeline import CXInsightsPipeline, PipelineConfig def setup_logging(verbose: bool = False) -> None: """Configure logging.""" level = logging.DEBUG if verbose else logging.INFO logging.basicConfig( level=level, format="%(asctime)s | %(levelname)-8s | %(name)s | %(message)s", datefmt="%Y-%m-%d %H:%M:%S", ) def progress_callback(stage: str, current: int, total: int) -> None: """Print progress to console.""" if total > 0: pct = current / total * 100 bar_len = 30 filled = int(bar_len * current / total) bar = "█" * filled + "░" * (bar_len - filled) print(f"\r{stage}: [{bar}] {pct:.0f}% ({current}/{total})", end="", flush=True) if current == total: print() # New line when complete def cmd_run(args: argparse.Namespace) -> int: """Run the analysis pipeline.""" print("=" * 60) print("CXInsights - Call Analysis Pipeline") print("=" * 60) # Build config config = PipelineConfig( input_dir=Path(args.input) if args.input else Path("data/audio"), output_dir=Path(args.output) if args.output else Path("data/output"), checkpoint_dir=Path(args.checkpoint) if args.checkpoint else Path("data/.checkpoints"), inference_model=args.model, use_compression=not args.no_compression, export_formats=args.formats.split(",") if args.formats else ["json", "excel"], auto_resume=not args.no_resume, ) print(f"\nConfiguration:") print(f" Input: {config.input_dir}") print(f" Output: {config.output_dir}") print(f" Model: {config.inference_model}") print(f" Compression: {'Enabled' if config.use_compression else 'Disabled'}") print(f" Formats: {', '.join(config.export_formats)}") print() # Check for transcripts transcripts_file = Path(args.transcripts) if args.transcripts else None if transcripts_file and transcripts_file.exists(): print(f"Loading transcripts from: {transcripts_file}") # Load transcripts (placeholder - would need actual loading logic) print("Note: Transcript loading not fully implemented in CLI") return 1 # Check for audio files audio_files = list(config.input_dir.glob("*.wav")) + list(config.input_dir.glob("*.mp3")) if not audio_files and not transcripts_file: print(f"Error: No audio files found in {config.input_dir}") print("Please provide audio files or use --transcripts option") return 1 print(f"Found {len(audio_files)} audio files") # Run pipeline pipeline = CXInsightsPipeline( config=config, progress_callback=progress_callback if not args.quiet else None, ) try: result = pipeline.run( batch_id=args.batch_id, audio_files=audio_files if audio_files else None, resume=not args.no_resume, ) print("\n" + "=" * 60) print("Pipeline Complete!") print("=" * 60) print(f"\nResults:") print(f" Total calls: {result.total_calls_processed}") print(f" Successful: {result.successful_analyses}") print(f" Failed: {result.failed_analyses}") print(f" Lost sales: {len(result.lost_sales_frequencies)} drivers") print(f" Poor CX: {len(result.poor_cx_frequencies)} drivers") if result.rca_tree: tree = result.rca_tree print(f"\n Top lost sales: {', '.join(tree.top_lost_sales_drivers[:3])}") print(f" Top poor CX: {', '.join(tree.top_poor_cx_drivers[:3])}") print(f"\nOutput: {config.output_dir / args.batch_id}") return 0 except Exception as e: logging.error(f"Pipeline failed: {e}") if args.verbose: import traceback traceback.print_exc() return 1 def cmd_status(args: argparse.Namespace) -> int: """Show pipeline status.""" from src.pipeline.models import PipelineManifest checkpoint_dir = Path(args.checkpoint) if args.checkpoint else Path("data/.checkpoints") manifest_path = checkpoint_dir / f"pipeline_{args.batch_id}.json" if not manifest_path.exists(): print(f"No pipeline found for batch: {args.batch_id}") return 1 manifest = PipelineManifest.load(manifest_path) print(f"\nPipeline Status: {manifest.batch_id}") print("=" * 50) print(f"Status: {manifest.status.value}") print(f"Created: {manifest.created_at}") print(f"Total duration: {manifest.total_duration_sec:.1f}s") print() print("Stages:") for stage, stage_manifest in manifest.stages.items(): status_icon = { "pending": "⏳", "running": "🔄", "completed": "✅", "failed": "❌", "skipped": "⏭️", }.get(stage_manifest.status.value, "?") duration = f"({stage_manifest.duration_sec:.1f}s)" if stage_manifest.duration_sec else "" print(f" {status_icon} {stage.value}: {stage_manifest.status.value} {duration}") if stage_manifest.processed_items > 0: print(f" Processed: {stage_manifest.processed_items}/{stage_manifest.total_items}") return 0 def cmd_export(args: argparse.Namespace) -> int: """Export results to different formats.""" print("Export command - not yet implemented") print("Use the run command with --formats option instead") return 1 def main() -> int: """Main entry point.""" parser = argparse.ArgumentParser( description="CXInsights - Call Center Analysis Pipeline", formatter_class=argparse.RawDescriptionHelpFormatter, ) parser.add_argument("-v", "--verbose", action="store_true", help="Verbose output") parser.add_argument("-q", "--quiet", action="store_true", help="Quiet output (no progress)") subparsers = parser.add_subparsers(dest="command", help="Available commands") # Run command run_parser = subparsers.add_parser("run", help="Run the analysis pipeline") run_parser.add_argument("batch_id", help="Unique batch identifier") run_parser.add_argument("-i", "--input", help="Input directory with audio files") run_parser.add_argument("-o", "--output", help="Output directory") run_parser.add_argument("-c", "--checkpoint", help="Checkpoint directory") run_parser.add_argument("-t", "--transcripts", help="Pre-existing transcripts file (JSON)") run_parser.add_argument("-m", "--model", default="gpt-4o-mini", help="LLM model to use") run_parser.add_argument("-f", "--formats", default="json,excel", help="Export formats (comma-separated)") run_parser.add_argument("--no-compression", action="store_true", help="Disable transcript compression") run_parser.add_argument("--no-resume", action="store_true", help="Don't resume from checkpoint") run_parser.set_defaults(func=cmd_run) # Status command status_parser = subparsers.add_parser("status", help="Show pipeline status") status_parser.add_argument("batch_id", help="Batch ID to check") status_parser.add_argument("-c", "--checkpoint", help="Checkpoint directory") status_parser.set_defaults(func=cmd_status) # Export command export_parser = subparsers.add_parser("export", help="Export results") export_parser.add_argument("batch_id", help="Batch ID to export") export_parser.add_argument("-f", "--format", choices=["json", "excel", "pdf"], default="json") export_parser.add_argument("-o", "--output", help="Output directory") export_parser.set_defaults(func=cmd_export) args = parser.parse_args() if not args.command: parser.print_help() return 0 setup_logging(args.verbose) return args.func(args) if __name__ == "__main__": sys.exit(main())