# API_REFERENCE.md > Documentación de funciones públicas principales --- ## Transcription Module ### `AssemblyAITranscriber` ```python from src.transcription import AssemblyAITranscriber class AssemblyAITranscriber(Transcriber): def __init__(self, api_key: str, language: str = "es"): """ Initialize AssemblyAI transcriber. Args: api_key: AssemblyAI API key language: Language code (default: "es" for Spanish) """ async def transcribe(self, audio_path: Path) -> Transcript: """ Transcribe a single audio file. Args: audio_path: Path to MP3/WAV file Returns: Transcript with speaker diarization Raises: TranscriptionError: If API fails """ async def transcribe_batch( self, audio_paths: list[Path], max_concurrent: int = 5 ) -> list[Transcript]: """ Transcribe multiple audio files in parallel. Args: audio_paths: List of paths to audio files max_concurrent: Max parallel requests Returns: List of Transcripts """ ``` **Example:** ```python transcriber = AssemblyAITranscriber(api_key=os.getenv("ASSEMBLYAI_API_KEY")) transcript = await transcriber.transcribe(Path("call_001.mp3")) print(f"Duration: {transcript.metadata.audio_duration_sec}s") print(f"Turns: {len(transcript.turns)}") ``` --- ## Inference Module ### `CallAnalyzer` ```python from src.inference import CallAnalyzer, AnalyzerConfig class CallAnalyzer: def __init__(self, config: AnalyzerConfig | None = None): """ Initialize call analyzer. Args: config: Analyzer configuration (optional) """ async def analyze(self, transcript: Transcript) -> CallAnalysis: """ Analyze a single transcript. Args: transcript: Transcript to analyze Returns: CallAnalysis with RCA labels and evidence """ async def analyze_batch( self, transcripts: list[Transcript], batch_id: str, progress_callback: Callable | None = None ) -> list[CallAnalysis]: """ Analyze multiple transcripts in parallel. Args: transcripts: List of transcripts batch_id: Batch identifier progress_callback: Optional progress callback Returns: List of CallAnalysis results """ ``` **Example:** ```python config = AnalyzerConfig( model="gpt-4o-mini", use_compression=True, max_concurrent=5, ) analyzer = CallAnalyzer(config) analyses = await analyzer.analyze_batch( transcripts=transcripts, batch_id="batch_001", progress_callback=lambda current, total: print(f"{current}/{total}") ) ``` --- ## Aggregation Module ### `aggregate_batch` ```python from src.aggregation import aggregate_batch def aggregate_batch( batch_id: str, analyses: list[CallAnalysis] ) -> BatchAggregation: """ Aggregate call analyses into statistics and RCA tree. Args: batch_id: Batch identifier analyses: List of call analyses Returns: BatchAggregation with frequencies, severities, and RCA tree """ ``` **Example:** ```python aggregation = aggregate_batch("batch_001", analyses) print(f"Lost sales drivers: {len(aggregation.lost_sales_frequencies)}") print(f"Top driver: {aggregation.rca_tree.top_lost_sales_drivers[0]}") ``` --- ## Pipeline Module ### `CXInsightsPipeline` ```python from src.pipeline import CXInsightsPipeline, PipelineConfig class CXInsightsPipeline: def __init__( self, config: PipelineConfig | None = None, progress_callback: Callable | None = None ): """ Initialize pipeline. Args: config: Pipeline configuration progress_callback: Optional progress callback """ def run( self, batch_id: str, audio_files: list[Path] | None = None, transcripts: list[Transcript] | None = None, resume: bool = True ) -> BatchAggregation: """ Run full pipeline. Args: batch_id: Batch identifier audio_files: Optional list of audio files transcripts: Optional pre-loaded transcripts resume: Whether to resume from checkpoint Returns: BatchAggregation with full results """ ``` **Example:** ```python config = PipelineConfig( input_dir=Path("data/audio"), output_dir=Path("data/output"), export_formats=["json", "excel", "pdf"], ) pipeline = CXInsightsPipeline(config) result = pipeline.run( batch_id="batch_001", audio_files=list(Path("data/audio").glob("*.mp3")), ) ``` --- ## Export Module ### `export_to_json` ```python from src.exports import export_to_json def export_to_json( batch_id: str, aggregation: BatchAggregation, analyses: list[CallAnalysis], output_dir: Path ) -> Path: """ Export results to JSON files. Args: batch_id: Batch identifier aggregation: Aggregated results analyses: Individual call analyses output_dir: Output directory Returns: Path to summary.json """ ``` ### `export_to_excel` ```python from src.exports import export_to_excel def export_to_excel( batch_id: str, aggregation: BatchAggregation, analyses: list[CallAnalysis], output_dir: Path ) -> Path: """ Export results to Excel workbook. Creates sheets: - Summary - Lost Sales Drivers - Poor CX Drivers - Call Details - Emergent Patterns Returns: Path to .xlsx file """ ``` ### `export_to_pdf` ```python from src.exports import export_to_pdf def export_to_pdf( batch_id: str, aggregation: BatchAggregation, output_dir: Path ) -> Path: """ Export executive report to PDF/HTML. Falls back to HTML if weasyprint not installed. Returns: Path to .pdf or .html file """ ``` --- ## Compression Module ### `TranscriptCompressor` ```python from src.compression import TranscriptCompressor class TranscriptCompressor: def compress(self, transcript: Transcript) -> CompressedTranscript: """ Compress transcript by extracting key information. Args: transcript: Full transcript Returns: CompressedTranscript with >60% token reduction """ ``` --- **Última actualización**: 2026-01-19