Dashboard Features: - 8 navigation sections: Overview, Outcomes, Poor CX, FCR, Churn, Agent, Call Explorer, Export - Beyond Brand Identity styling (colors #6D84E3, Outfit font) - RCA Sankey diagram (Driver → Outcome → Churn Risk flow) - Correlation heatmaps (driver co-occurrence, driver-outcome) - Outcome Deep Dive (root causes, correlation, duration analysis) - Export functionality (Excel, HTML, JSON) Blueprint Compliance: - FCR: 4 categories (Primera Llamada/Rellamada × Sin/Con Riesgo de Fuga) - Churn: Binary view (Sin Riesgo de Fuga / En Riesgo de Fuga) - Agent: Talento Para Replicar / Oportunidades de Mejora - Fixed FCR rate calculation (only FIRST_CALL counts as success) Technical: - Streamlit + Plotly for interactive visualizations - Light theme configuration (.streamlit/config.toml) - Fixed Plotly colorbar titlefont deprecation Documentation: - Updated PROJECT_CONTEXT.md, TODO.md, CHANGELOG.md - Added 4 new technical decisions (TD-014 to TD-017) - Created TROUBLESHOOTING.md with 10 common issues Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
323 lines
9.6 KiB
Python
323 lines
9.6 KiB
Python
"""
|
|
CXInsights - Transcription Module Tests
|
|
|
|
Unit tests for transcription models and utilities.
|
|
Does NOT test actual API calls (those are in integration tests).
|
|
"""
|
|
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
from src.transcription.models import (
|
|
AudioMetadata,
|
|
SpeakerTurn,
|
|
Transcript,
|
|
TranscriptMetadata,
|
|
TranscriptionConfig,
|
|
TranscriptionError,
|
|
TranscriptionResult,
|
|
TranscriptionStatus,
|
|
)
|
|
|
|
|
|
class TestSpeakerTurn:
|
|
"""Tests for SpeakerTurn model."""
|
|
|
|
def test_create_valid_turn(self):
|
|
"""Test creating a valid speaker turn."""
|
|
turn = SpeakerTurn(
|
|
speaker="A",
|
|
text="Hola, buenos días",
|
|
start_time=0.0,
|
|
end_time=2.5,
|
|
confidence=0.95,
|
|
)
|
|
|
|
assert turn.speaker == "A"
|
|
assert turn.text == "Hola, buenos días"
|
|
assert turn.start_time == 0.0
|
|
assert turn.end_time == 2.5
|
|
assert turn.confidence == 0.95
|
|
|
|
def test_duration_computed(self):
|
|
"""Test that duration is computed correctly."""
|
|
turn = SpeakerTurn(
|
|
speaker="A",
|
|
text="Test",
|
|
start_time=10.0,
|
|
end_time=15.5,
|
|
)
|
|
|
|
assert turn.duration_sec == 5.5
|
|
|
|
def test_word_count_computed(self):
|
|
"""Test that word count is computed correctly."""
|
|
turn = SpeakerTurn(
|
|
speaker="A",
|
|
text="Esto es una prueba de conteo de palabras",
|
|
start_time=0.0,
|
|
end_time=5.0,
|
|
)
|
|
|
|
assert turn.word_count == 7
|
|
|
|
def test_empty_text_word_count(self):
|
|
"""Test word count with empty text."""
|
|
turn = SpeakerTurn(
|
|
speaker="A",
|
|
text="",
|
|
start_time=0.0,
|
|
end_time=1.0,
|
|
)
|
|
|
|
assert turn.word_count == 1 # Empty string splits to ['']
|
|
|
|
def test_confidence_optional(self):
|
|
"""Test that confidence is optional."""
|
|
turn = SpeakerTurn(
|
|
speaker="A",
|
|
text="Test",
|
|
start_time=0.0,
|
|
end_time=1.0,
|
|
)
|
|
|
|
assert turn.confidence is None
|
|
|
|
|
|
class TestTranscriptMetadata:
|
|
"""Tests for TranscriptMetadata model."""
|
|
|
|
def test_create_metadata(self):
|
|
"""Test creating transcript metadata."""
|
|
metadata = TranscriptMetadata(
|
|
audio_duration_sec=420.5,
|
|
audio_file="call_001.mp3",
|
|
language="es",
|
|
provider="assemblyai",
|
|
job_id="abc123",
|
|
)
|
|
|
|
assert metadata.audio_duration_sec == 420.5
|
|
assert metadata.audio_file == "call_001.mp3"
|
|
assert metadata.language == "es"
|
|
assert metadata.provider == "assemblyai"
|
|
assert metadata.job_id == "abc123"
|
|
|
|
def test_created_at_default(self):
|
|
"""Test that created_at defaults to now."""
|
|
metadata = TranscriptMetadata(
|
|
audio_duration_sec=100.0,
|
|
audio_file="test.mp3",
|
|
provider="assemblyai",
|
|
)
|
|
|
|
assert metadata.created_at is not None
|
|
assert isinstance(metadata.created_at, datetime)
|
|
|
|
|
|
class TestTranscript:
|
|
"""Tests for Transcript model."""
|
|
|
|
@pytest.fixture
|
|
def sample_transcript(self):
|
|
"""Create a sample transcript for testing."""
|
|
return Transcript(
|
|
call_id="CALL001",
|
|
turns=[
|
|
SpeakerTurn(
|
|
speaker="agent",
|
|
text="Buenos días, ¿en qué puedo ayudarle?",
|
|
start_time=0.0,
|
|
end_time=3.0,
|
|
),
|
|
SpeakerTurn(
|
|
speaker="customer",
|
|
text="Quiero cancelar mi servicio",
|
|
start_time=3.5,
|
|
end_time=6.0,
|
|
),
|
|
SpeakerTurn(
|
|
speaker="agent",
|
|
text="Entiendo, ¿me puede indicar el motivo?",
|
|
start_time=6.5,
|
|
end_time=9.0,
|
|
),
|
|
],
|
|
metadata=TranscriptMetadata(
|
|
audio_duration_sec=420.0,
|
|
audio_file="CALL001.mp3",
|
|
provider="assemblyai",
|
|
speaker_count=2,
|
|
),
|
|
)
|
|
|
|
def test_total_turns(self, sample_transcript):
|
|
"""Test total turns count."""
|
|
assert sample_transcript.total_turns == 3
|
|
|
|
def test_total_words(self, sample_transcript):
|
|
"""Test total words count."""
|
|
# "Buenos días, ¿en qué puedo ayudarle?" = 6 words
|
|
# "Quiero cancelar mi servicio" = 4 words
|
|
# "Entiendo, ¿me puede indicar el motivo?" = 6 words
|
|
assert sample_transcript.total_words == 16
|
|
|
|
def test_get_full_text(self, sample_transcript):
|
|
"""Test getting full text."""
|
|
full_text = sample_transcript.get_full_text()
|
|
assert "Buenos días" in full_text
|
|
assert "cancelar mi servicio" in full_text
|
|
|
|
def test_get_speaker_text(self, sample_transcript):
|
|
"""Test getting text for a specific speaker."""
|
|
agent_text = sample_transcript.get_speaker_text("agent")
|
|
customer_text = sample_transcript.get_speaker_text("customer")
|
|
|
|
assert "Buenos días" in agent_text
|
|
assert "cancelar" not in agent_text
|
|
assert "cancelar mi servicio" in customer_text
|
|
|
|
def test_get_speakers(self, sample_transcript):
|
|
"""Test getting unique speakers."""
|
|
speakers = sample_transcript.get_speakers()
|
|
|
|
assert len(speakers) == 2
|
|
assert "agent" in speakers
|
|
assert "customer" in speakers
|
|
|
|
|
|
class TestTranscriptionResult:
|
|
"""Tests for TranscriptionResult model."""
|
|
|
|
def test_success_result(self):
|
|
"""Test creating a successful result."""
|
|
transcript = Transcript(
|
|
call_id="CALL001",
|
|
turns=[],
|
|
metadata=TranscriptMetadata(
|
|
audio_duration_sec=100.0,
|
|
audio_file="test.mp3",
|
|
provider="assemblyai",
|
|
),
|
|
)
|
|
|
|
result = TranscriptionResult.success(
|
|
call_id="CALL001",
|
|
audio_path=Path("test.mp3"),
|
|
transcript=transcript,
|
|
)
|
|
|
|
assert result.status == TranscriptionStatus.COMPLETED
|
|
assert result.is_success is True
|
|
assert result.transcript is not None
|
|
assert result.error is None
|
|
|
|
def test_failure_result(self):
|
|
"""Test creating a failed result."""
|
|
result = TranscriptionResult.failure(
|
|
call_id="CALL001",
|
|
audio_path=Path("test.mp3"),
|
|
error=TranscriptionError.API_ERROR,
|
|
error_message="Rate limit exceeded",
|
|
)
|
|
|
|
assert result.status == TranscriptionStatus.FAILED
|
|
assert result.is_success is False
|
|
assert result.transcript is None
|
|
assert result.error == TranscriptionError.API_ERROR
|
|
assert result.error_message == "Rate limit exceeded"
|
|
|
|
def test_processing_time_computed(self):
|
|
"""Test processing time calculation."""
|
|
result = TranscriptionResult(
|
|
call_id="CALL001",
|
|
audio_path="test.mp3",
|
|
status=TranscriptionStatus.COMPLETED,
|
|
started_at=datetime(2024, 1, 1, 12, 0, 0),
|
|
completed_at=datetime(2024, 1, 1, 12, 0, 30),
|
|
)
|
|
|
|
assert result.processing_time_sec == 30.0
|
|
|
|
|
|
class TestAudioMetadata:
|
|
"""Tests for AudioMetadata model."""
|
|
|
|
def test_create_metadata(self):
|
|
"""Test creating audio metadata."""
|
|
metadata = AudioMetadata(
|
|
file_path="/data/audio/call.mp3",
|
|
file_size_bytes=5242880, # 5 MB
|
|
duration_sec=420.0, # 7 minutes
|
|
format="mp3",
|
|
codec="mp3",
|
|
sample_rate=44100,
|
|
channels=2,
|
|
bit_rate=128000,
|
|
)
|
|
|
|
assert metadata.file_path == "/data/audio/call.mp3"
|
|
assert metadata.duration_sec == 420.0
|
|
assert metadata.format == "mp3"
|
|
|
|
def test_duration_minutes(self):
|
|
"""Test duration in minutes conversion."""
|
|
metadata = AudioMetadata(
|
|
file_path="test.mp3",
|
|
file_size_bytes=1000000,
|
|
duration_sec=420.0,
|
|
format="mp3",
|
|
)
|
|
|
|
assert metadata.duration_minutes == 7.0
|
|
|
|
def test_file_size_mb(self):
|
|
"""Test file size in MB conversion."""
|
|
metadata = AudioMetadata(
|
|
file_path="test.mp3",
|
|
file_size_bytes=5242880, # 5 MB
|
|
duration_sec=100.0,
|
|
format="mp3",
|
|
)
|
|
|
|
assert metadata.file_size_mb == 5.0
|
|
|
|
|
|
class TestTranscriptionConfig:
|
|
"""Tests for TranscriptionConfig model."""
|
|
|
|
def test_default_config(self):
|
|
"""Test default configuration values."""
|
|
config = TranscriptionConfig()
|
|
|
|
assert config.language_code == "es"
|
|
assert config.speaker_labels is True
|
|
assert config.punctuate is True
|
|
assert config.format_text is True
|
|
assert config.auto_chapters is False
|
|
|
|
def test_custom_config(self):
|
|
"""Test custom configuration."""
|
|
config = TranscriptionConfig(
|
|
language_code="en",
|
|
speaker_labels=False,
|
|
auto_chapters=True,
|
|
)
|
|
|
|
assert config.language_code == "en"
|
|
assert config.speaker_labels is False
|
|
assert config.auto_chapters is True
|
|
|
|
|
|
class TestTranscriptionError:
|
|
"""Tests for TranscriptionError enum."""
|
|
|
|
def test_error_values(self):
|
|
"""Test that all error values are strings."""
|
|
assert TranscriptionError.FILE_NOT_FOUND == "FILE_NOT_FOUND"
|
|
assert TranscriptionError.API_ERROR == "API_ERROR"
|
|
assert TranscriptionError.RATE_LIMITED == "RATE_LIMITED"
|
|
assert TranscriptionError.TIMEOUT == "TIMEOUT"
|