feat: Add Streamlit dashboard with Blueprint compliance (v2.1.0)
Dashboard Features: - 8 navigation sections: Overview, Outcomes, Poor CX, FCR, Churn, Agent, Call Explorer, Export - Beyond Brand Identity styling (colors #6D84E3, Outfit font) - RCA Sankey diagram (Driver → Outcome → Churn Risk flow) - Correlation heatmaps (driver co-occurrence, driver-outcome) - Outcome Deep Dive (root causes, correlation, duration analysis) - Export functionality (Excel, HTML, JSON) Blueprint Compliance: - FCR: 4 categories (Primera Llamada/Rellamada × Sin/Con Riesgo de Fuga) - Churn: Binary view (Sin Riesgo de Fuga / En Riesgo de Fuga) - Agent: Talento Para Replicar / Oportunidades de Mejora - Fixed FCR rate calculation (only FIRST_CALL counts as success) Technical: - Streamlit + Plotly for interactive visualizations - Light theme configuration (.streamlit/config.toml) - Fixed Plotly colorbar titlefont deprecation Documentation: - Updated PROJECT_CONTEXT.md, TODO.md, CHANGELOG.md - Added 4 new technical decisions (TD-014 to TD-017) - Created TROUBLESHOOTING.md with 10 common issues Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
322
tests/unit/test_transcription.py
Normal file
322
tests/unit/test_transcription.py
Normal file
@@ -0,0 +1,322 @@
|
||||
"""
|
||||
CXInsights - Transcription Module Tests
|
||||
|
||||
Unit tests for transcription models and utilities.
|
||||
Does NOT test actual API calls (those are in integration tests).
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from src.transcription.models import (
|
||||
AudioMetadata,
|
||||
SpeakerTurn,
|
||||
Transcript,
|
||||
TranscriptMetadata,
|
||||
TranscriptionConfig,
|
||||
TranscriptionError,
|
||||
TranscriptionResult,
|
||||
TranscriptionStatus,
|
||||
)
|
||||
|
||||
|
||||
class TestSpeakerTurn:
|
||||
"""Tests for SpeakerTurn model."""
|
||||
|
||||
def test_create_valid_turn(self):
|
||||
"""Test creating a valid speaker turn."""
|
||||
turn = SpeakerTurn(
|
||||
speaker="A",
|
||||
text="Hola, buenos días",
|
||||
start_time=0.0,
|
||||
end_time=2.5,
|
||||
confidence=0.95,
|
||||
)
|
||||
|
||||
assert turn.speaker == "A"
|
||||
assert turn.text == "Hola, buenos días"
|
||||
assert turn.start_time == 0.0
|
||||
assert turn.end_time == 2.5
|
||||
assert turn.confidence == 0.95
|
||||
|
||||
def test_duration_computed(self):
|
||||
"""Test that duration is computed correctly."""
|
||||
turn = SpeakerTurn(
|
||||
speaker="A",
|
||||
text="Test",
|
||||
start_time=10.0,
|
||||
end_time=15.5,
|
||||
)
|
||||
|
||||
assert turn.duration_sec == 5.5
|
||||
|
||||
def test_word_count_computed(self):
|
||||
"""Test that word count is computed correctly."""
|
||||
turn = SpeakerTurn(
|
||||
speaker="A",
|
||||
text="Esto es una prueba de conteo de palabras",
|
||||
start_time=0.0,
|
||||
end_time=5.0,
|
||||
)
|
||||
|
||||
assert turn.word_count == 7
|
||||
|
||||
def test_empty_text_word_count(self):
|
||||
"""Test word count with empty text."""
|
||||
turn = SpeakerTurn(
|
||||
speaker="A",
|
||||
text="",
|
||||
start_time=0.0,
|
||||
end_time=1.0,
|
||||
)
|
||||
|
||||
assert turn.word_count == 1 # Empty string splits to ['']
|
||||
|
||||
def test_confidence_optional(self):
|
||||
"""Test that confidence is optional."""
|
||||
turn = SpeakerTurn(
|
||||
speaker="A",
|
||||
text="Test",
|
||||
start_time=0.0,
|
||||
end_time=1.0,
|
||||
)
|
||||
|
||||
assert turn.confidence is None
|
||||
|
||||
|
||||
class TestTranscriptMetadata:
|
||||
"""Tests for TranscriptMetadata model."""
|
||||
|
||||
def test_create_metadata(self):
|
||||
"""Test creating transcript metadata."""
|
||||
metadata = TranscriptMetadata(
|
||||
audio_duration_sec=420.5,
|
||||
audio_file="call_001.mp3",
|
||||
language="es",
|
||||
provider="assemblyai",
|
||||
job_id="abc123",
|
||||
)
|
||||
|
||||
assert metadata.audio_duration_sec == 420.5
|
||||
assert metadata.audio_file == "call_001.mp3"
|
||||
assert metadata.language == "es"
|
||||
assert metadata.provider == "assemblyai"
|
||||
assert metadata.job_id == "abc123"
|
||||
|
||||
def test_created_at_default(self):
|
||||
"""Test that created_at defaults to now."""
|
||||
metadata = TranscriptMetadata(
|
||||
audio_duration_sec=100.0,
|
||||
audio_file="test.mp3",
|
||||
provider="assemblyai",
|
||||
)
|
||||
|
||||
assert metadata.created_at is not None
|
||||
assert isinstance(metadata.created_at, datetime)
|
||||
|
||||
|
||||
class TestTranscript:
|
||||
"""Tests for Transcript model."""
|
||||
|
||||
@pytest.fixture
|
||||
def sample_transcript(self):
|
||||
"""Create a sample transcript for testing."""
|
||||
return Transcript(
|
||||
call_id="CALL001",
|
||||
turns=[
|
||||
SpeakerTurn(
|
||||
speaker="agent",
|
||||
text="Buenos días, ¿en qué puedo ayudarle?",
|
||||
start_time=0.0,
|
||||
end_time=3.0,
|
||||
),
|
||||
SpeakerTurn(
|
||||
speaker="customer",
|
||||
text="Quiero cancelar mi servicio",
|
||||
start_time=3.5,
|
||||
end_time=6.0,
|
||||
),
|
||||
SpeakerTurn(
|
||||
speaker="agent",
|
||||
text="Entiendo, ¿me puede indicar el motivo?",
|
||||
start_time=6.5,
|
||||
end_time=9.0,
|
||||
),
|
||||
],
|
||||
metadata=TranscriptMetadata(
|
||||
audio_duration_sec=420.0,
|
||||
audio_file="CALL001.mp3",
|
||||
provider="assemblyai",
|
||||
speaker_count=2,
|
||||
),
|
||||
)
|
||||
|
||||
def test_total_turns(self, sample_transcript):
|
||||
"""Test total turns count."""
|
||||
assert sample_transcript.total_turns == 3
|
||||
|
||||
def test_total_words(self, sample_transcript):
|
||||
"""Test total words count."""
|
||||
# "Buenos días, ¿en qué puedo ayudarle?" = 6 words
|
||||
# "Quiero cancelar mi servicio" = 4 words
|
||||
# "Entiendo, ¿me puede indicar el motivo?" = 6 words
|
||||
assert sample_transcript.total_words == 16
|
||||
|
||||
def test_get_full_text(self, sample_transcript):
|
||||
"""Test getting full text."""
|
||||
full_text = sample_transcript.get_full_text()
|
||||
assert "Buenos días" in full_text
|
||||
assert "cancelar mi servicio" in full_text
|
||||
|
||||
def test_get_speaker_text(self, sample_transcript):
|
||||
"""Test getting text for a specific speaker."""
|
||||
agent_text = sample_transcript.get_speaker_text("agent")
|
||||
customer_text = sample_transcript.get_speaker_text("customer")
|
||||
|
||||
assert "Buenos días" in agent_text
|
||||
assert "cancelar" not in agent_text
|
||||
assert "cancelar mi servicio" in customer_text
|
||||
|
||||
def test_get_speakers(self, sample_transcript):
|
||||
"""Test getting unique speakers."""
|
||||
speakers = sample_transcript.get_speakers()
|
||||
|
||||
assert len(speakers) == 2
|
||||
assert "agent" in speakers
|
||||
assert "customer" in speakers
|
||||
|
||||
|
||||
class TestTranscriptionResult:
|
||||
"""Tests for TranscriptionResult model."""
|
||||
|
||||
def test_success_result(self):
|
||||
"""Test creating a successful result."""
|
||||
transcript = Transcript(
|
||||
call_id="CALL001",
|
||||
turns=[],
|
||||
metadata=TranscriptMetadata(
|
||||
audio_duration_sec=100.0,
|
||||
audio_file="test.mp3",
|
||||
provider="assemblyai",
|
||||
),
|
||||
)
|
||||
|
||||
result = TranscriptionResult.success(
|
||||
call_id="CALL001",
|
||||
audio_path=Path("test.mp3"),
|
||||
transcript=transcript,
|
||||
)
|
||||
|
||||
assert result.status == TranscriptionStatus.COMPLETED
|
||||
assert result.is_success is True
|
||||
assert result.transcript is not None
|
||||
assert result.error is None
|
||||
|
||||
def test_failure_result(self):
|
||||
"""Test creating a failed result."""
|
||||
result = TranscriptionResult.failure(
|
||||
call_id="CALL001",
|
||||
audio_path=Path("test.mp3"),
|
||||
error=TranscriptionError.API_ERROR,
|
||||
error_message="Rate limit exceeded",
|
||||
)
|
||||
|
||||
assert result.status == TranscriptionStatus.FAILED
|
||||
assert result.is_success is False
|
||||
assert result.transcript is None
|
||||
assert result.error == TranscriptionError.API_ERROR
|
||||
assert result.error_message == "Rate limit exceeded"
|
||||
|
||||
def test_processing_time_computed(self):
|
||||
"""Test processing time calculation."""
|
||||
result = TranscriptionResult(
|
||||
call_id="CALL001",
|
||||
audio_path="test.mp3",
|
||||
status=TranscriptionStatus.COMPLETED,
|
||||
started_at=datetime(2024, 1, 1, 12, 0, 0),
|
||||
completed_at=datetime(2024, 1, 1, 12, 0, 30),
|
||||
)
|
||||
|
||||
assert result.processing_time_sec == 30.0
|
||||
|
||||
|
||||
class TestAudioMetadata:
|
||||
"""Tests for AudioMetadata model."""
|
||||
|
||||
def test_create_metadata(self):
|
||||
"""Test creating audio metadata."""
|
||||
metadata = AudioMetadata(
|
||||
file_path="/data/audio/call.mp3",
|
||||
file_size_bytes=5242880, # 5 MB
|
||||
duration_sec=420.0, # 7 minutes
|
||||
format="mp3",
|
||||
codec="mp3",
|
||||
sample_rate=44100,
|
||||
channels=2,
|
||||
bit_rate=128000,
|
||||
)
|
||||
|
||||
assert metadata.file_path == "/data/audio/call.mp3"
|
||||
assert metadata.duration_sec == 420.0
|
||||
assert metadata.format == "mp3"
|
||||
|
||||
def test_duration_minutes(self):
|
||||
"""Test duration in minutes conversion."""
|
||||
metadata = AudioMetadata(
|
||||
file_path="test.mp3",
|
||||
file_size_bytes=1000000,
|
||||
duration_sec=420.0,
|
||||
format="mp3",
|
||||
)
|
||||
|
||||
assert metadata.duration_minutes == 7.0
|
||||
|
||||
def test_file_size_mb(self):
|
||||
"""Test file size in MB conversion."""
|
||||
metadata = AudioMetadata(
|
||||
file_path="test.mp3",
|
||||
file_size_bytes=5242880, # 5 MB
|
||||
duration_sec=100.0,
|
||||
format="mp3",
|
||||
)
|
||||
|
||||
assert metadata.file_size_mb == 5.0
|
||||
|
||||
|
||||
class TestTranscriptionConfig:
|
||||
"""Tests for TranscriptionConfig model."""
|
||||
|
||||
def test_default_config(self):
|
||||
"""Test default configuration values."""
|
||||
config = TranscriptionConfig()
|
||||
|
||||
assert config.language_code == "es"
|
||||
assert config.speaker_labels is True
|
||||
assert config.punctuate is True
|
||||
assert config.format_text is True
|
||||
assert config.auto_chapters is False
|
||||
|
||||
def test_custom_config(self):
|
||||
"""Test custom configuration."""
|
||||
config = TranscriptionConfig(
|
||||
language_code="en",
|
||||
speaker_labels=False,
|
||||
auto_chapters=True,
|
||||
)
|
||||
|
||||
assert config.language_code == "en"
|
||||
assert config.speaker_labels is False
|
||||
assert config.auto_chapters is True
|
||||
|
||||
|
||||
class TestTranscriptionError:
|
||||
"""Tests for TranscriptionError enum."""
|
||||
|
||||
def test_error_values(self):
|
||||
"""Test that all error values are strings."""
|
||||
assert TranscriptionError.FILE_NOT_FOUND == "FILE_NOT_FOUND"
|
||||
assert TranscriptionError.API_ERROR == "API_ERROR"
|
||||
assert TranscriptionError.RATE_LIMITED == "RATE_LIMITED"
|
||||
assert TranscriptionError.TIMEOUT == "TIMEOUT"
|
||||
Reference in New Issue
Block a user