feat: Add Streamlit dashboard with Blueprint compliance (v2.1.0)
Dashboard Features: - 8 navigation sections: Overview, Outcomes, Poor CX, FCR, Churn, Agent, Call Explorer, Export - Beyond Brand Identity styling (colors #6D84E3, Outfit font) - RCA Sankey diagram (Driver → Outcome → Churn Risk flow) - Correlation heatmaps (driver co-occurrence, driver-outcome) - Outcome Deep Dive (root causes, correlation, duration analysis) - Export functionality (Excel, HTML, JSON) Blueprint Compliance: - FCR: 4 categories (Primera Llamada/Rellamada × Sin/Con Riesgo de Fuga) - Churn: Binary view (Sin Riesgo de Fuga / En Riesgo de Fuga) - Agent: Talento Para Replicar / Oportunidades de Mejora - Fixed FCR rate calculation (only FIRST_CALL counts as success) Technical: - Streamlit + Plotly for interactive visualizations - Light theme configuration (.streamlit/config.toml) - Fixed Plotly colorbar titlefont deprecation Documentation: - Updated PROJECT_CONTEXT.md, TODO.md, CHANGELOG.md - Added 4 new technical decisions (TD-014 to TD-017) - Created TROUBLESHOOTING.md with 10 common issues Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
394
tests/unit/test_features.py
Normal file
394
tests/unit/test_features.py
Normal file
@@ -0,0 +1,394 @@
|
||||
"""
|
||||
CXInsights - Feature Extraction Tests
|
||||
|
||||
Tests for deterministic feature extraction.
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
from src.features.event_detector import EventDetector, EventDetectorConfig, detect_events
|
||||
from src.features.extractor import FeatureExtractor, extract_features
|
||||
from src.features.turn_metrics import TurnMetricsCalculator, calculate_turn_metrics
|
||||
from src.models.call_analysis import EventType
|
||||
from src.transcription.models import SpeakerTurn, Transcript, TranscriptMetadata
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_transcript():
|
||||
"""Create a sample transcript for testing."""
|
||||
return Transcript(
|
||||
call_id="TEST001",
|
||||
turns=[
|
||||
SpeakerTurn(
|
||||
speaker="agent",
|
||||
text="Buenos días, ¿en qué puedo ayudarle?",
|
||||
start_time=0.0,
|
||||
end_time=3.0,
|
||||
),
|
||||
SpeakerTurn(
|
||||
speaker="customer",
|
||||
text="Hola, quiero cancelar mi servicio.",
|
||||
start_time=3.5,
|
||||
end_time=6.5,
|
||||
),
|
||||
SpeakerTurn(
|
||||
speaker="agent",
|
||||
text="Entiendo. Un momento, por favor, le pongo en espera mientras consulto.",
|
||||
start_time=7.0,
|
||||
end_time=12.0,
|
||||
),
|
||||
# Silence gap (hold)
|
||||
SpeakerTurn(
|
||||
speaker="agent",
|
||||
text="Gracias por la espera. Le cuento que tenemos una oferta especial.",
|
||||
start_time=45.0,
|
||||
end_time=52.0,
|
||||
),
|
||||
SpeakerTurn(
|
||||
speaker="customer",
|
||||
text="No me interesa, es demasiado caro.",
|
||||
start_time=52.5,
|
||||
end_time=56.0,
|
||||
),
|
||||
SpeakerTurn(
|
||||
speaker="agent",
|
||||
text="Le voy a transferir con el departamento de retenciones.",
|
||||
start_time=56.5,
|
||||
end_time=61.0,
|
||||
),
|
||||
],
|
||||
metadata=TranscriptMetadata(
|
||||
audio_duration_sec=120.0,
|
||||
audio_file="TEST001.mp3",
|
||||
provider="test",
|
||||
speaker_count=2,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def transcript_with_interruptions():
|
||||
"""Create a transcript with overlapping speech."""
|
||||
return Transcript(
|
||||
call_id="TEST002",
|
||||
turns=[
|
||||
SpeakerTurn(
|
||||
speaker="agent",
|
||||
text="Le explico cómo funciona el proceso...",
|
||||
start_time=0.0,
|
||||
end_time=5.0,
|
||||
),
|
||||
SpeakerTurn(
|
||||
speaker="customer",
|
||||
text="Pero es que yo ya lo sé...",
|
||||
start_time=4.5, # Starts before agent ends
|
||||
end_time=7.0,
|
||||
),
|
||||
SpeakerTurn(
|
||||
speaker="agent",
|
||||
text="Perdone, le decía que...",
|
||||
start_time=6.8, # Starts before customer ends
|
||||
end_time=10.0,
|
||||
),
|
||||
],
|
||||
metadata=TranscriptMetadata(
|
||||
audio_duration_sec=60.0,
|
||||
audio_file="TEST002.mp3",
|
||||
provider="test",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def transcript_with_silences():
|
||||
"""Create a transcript with significant silences."""
|
||||
return Transcript(
|
||||
call_id="TEST003",
|
||||
turns=[
|
||||
SpeakerTurn(
|
||||
speaker="agent",
|
||||
text="Voy a comprobar su cuenta.",
|
||||
start_time=0.0,
|
||||
end_time=3.0,
|
||||
),
|
||||
# 10 second gap
|
||||
SpeakerTurn(
|
||||
speaker="agent",
|
||||
text="Ya tengo la información.",
|
||||
start_time=13.0,
|
||||
end_time=16.0,
|
||||
),
|
||||
# 8 second gap
|
||||
SpeakerTurn(
|
||||
speaker="customer",
|
||||
text="¿Y qué dice?",
|
||||
start_time=24.0,
|
||||
end_time=26.0,
|
||||
),
|
||||
],
|
||||
metadata=TranscriptMetadata(
|
||||
audio_duration_sec=30.0,
|
||||
audio_file="TEST003.mp3",
|
||||
provider="test",
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class TestEventDetector:
|
||||
"""Tests for EventDetector."""
|
||||
|
||||
def test_detect_hold_start(self, sample_transcript):
|
||||
"""Test detection of hold start patterns."""
|
||||
events = detect_events(sample_transcript)
|
||||
|
||||
hold_starts = [e for e in events if e.event_type == EventType.HOLD_START]
|
||||
assert len(hold_starts) >= 1
|
||||
# Should detect "Un momento, por favor, le pongo en espera"
|
||||
|
||||
def test_detect_hold_end(self, sample_transcript):
|
||||
"""Test detection of hold end patterns."""
|
||||
events = detect_events(sample_transcript)
|
||||
|
||||
hold_ends = [e for e in events if e.event_type == EventType.HOLD_END]
|
||||
assert len(hold_ends) >= 1
|
||||
# Should detect "Gracias por la espera"
|
||||
|
||||
def test_detect_transfer(self, sample_transcript):
|
||||
"""Test detection of transfer patterns."""
|
||||
events = detect_events(sample_transcript)
|
||||
|
||||
transfers = [e for e in events if e.event_type == EventType.TRANSFER]
|
||||
assert len(transfers) >= 1
|
||||
# Should detect "Le voy a transferir"
|
||||
|
||||
def test_detect_silence(self, transcript_with_silences):
|
||||
"""Test detection of significant silences."""
|
||||
config = EventDetectorConfig(silence_threshold_sec=5.0)
|
||||
detector = EventDetector(config)
|
||||
events = detector.detect_all(transcript_with_silences)
|
||||
|
||||
silences = [e for e in events if e.event_type == EventType.SILENCE]
|
||||
assert len(silences) == 2 # Two gaps > 5 seconds
|
||||
assert silences[0].duration_sec == 10.0
|
||||
assert silences[1].duration_sec == 8.0
|
||||
|
||||
def test_detect_interruptions(self, transcript_with_interruptions):
|
||||
"""Test detection of interruptions."""
|
||||
events = detect_events(transcript_with_interruptions)
|
||||
|
||||
interruptions = [e for e in events if e.event_type == EventType.INTERRUPTION]
|
||||
assert len(interruptions) == 2 # Two overlapping segments
|
||||
|
||||
def test_events_sorted_by_time(self, sample_transcript):
|
||||
"""Test that events are sorted by start time."""
|
||||
events = detect_events(sample_transcript)
|
||||
|
||||
for i in range(1, len(events)):
|
||||
assert events[i].start_time >= events[i - 1].start_time
|
||||
|
||||
def test_event_has_observed_source(self, sample_transcript):
|
||||
"""Test that all events have source='observed'."""
|
||||
events = detect_events(sample_transcript)
|
||||
|
||||
for event in events:
|
||||
assert event.source == "observed"
|
||||
|
||||
|
||||
class TestTurnMetrics:
|
||||
"""Tests for TurnMetricsCalculator."""
|
||||
|
||||
def test_turn_counts(self, sample_transcript):
|
||||
"""Test turn counting."""
|
||||
metrics = calculate_turn_metrics(sample_transcript)
|
||||
|
||||
assert metrics.total_turns == 6
|
||||
assert metrics.agent_turns == 4
|
||||
assert metrics.customer_turns == 2
|
||||
|
||||
def test_talk_ratios(self, sample_transcript):
|
||||
"""Test talk ratio calculations."""
|
||||
metrics = calculate_turn_metrics(sample_transcript)
|
||||
|
||||
# Ratios should be between 0 and 1
|
||||
assert 0 <= metrics.agent_talk_ratio <= 1
|
||||
assert 0 <= metrics.customer_talk_ratio <= 1
|
||||
assert 0 <= metrics.silence_ratio <= 1
|
||||
|
||||
# Sum should be approximately 1 (may have gaps)
|
||||
total = metrics.agent_talk_ratio + metrics.customer_talk_ratio + metrics.silence_ratio
|
||||
assert total <= 1.1 # Allow small rounding
|
||||
|
||||
def test_interruption_count(self, transcript_with_interruptions):
|
||||
"""Test interruption counting in metrics."""
|
||||
metrics = calculate_turn_metrics(transcript_with_interruptions)
|
||||
|
||||
assert metrics.interruption_count == 2
|
||||
|
||||
def test_avg_turn_duration(self, sample_transcript):
|
||||
"""Test average turn duration calculation."""
|
||||
metrics = calculate_turn_metrics(sample_transcript)
|
||||
|
||||
assert metrics.avg_turn_duration_sec > 0
|
||||
|
||||
def test_metrics_has_observed_source(self, sample_transcript):
|
||||
"""Test that metrics have source='observed'."""
|
||||
metrics = calculate_turn_metrics(sample_transcript)
|
||||
|
||||
assert metrics.source == "observed"
|
||||
|
||||
def test_empty_transcript(self):
|
||||
"""Test handling of empty transcript."""
|
||||
empty = Transcript(
|
||||
call_id="EMPTY",
|
||||
turns=[],
|
||||
metadata=TranscriptMetadata(
|
||||
audio_duration_sec=0.0,
|
||||
audio_file="empty.mp3",
|
||||
provider="test",
|
||||
),
|
||||
)
|
||||
|
||||
metrics = calculate_turn_metrics(empty)
|
||||
|
||||
assert metrics.total_turns == 0
|
||||
assert metrics.agent_turns == 0
|
||||
assert metrics.customer_turns == 0
|
||||
|
||||
|
||||
class TestFeatureExtractor:
|
||||
"""Tests for FeatureExtractor."""
|
||||
|
||||
def test_extract_features(self, sample_transcript):
|
||||
"""Test complete feature extraction."""
|
||||
features = extract_features(sample_transcript)
|
||||
|
||||
assert features.call_id == "TEST001"
|
||||
assert features.audio_duration_sec == 120.0
|
||||
assert features.language == "es"
|
||||
|
||||
def test_features_have_events(self, sample_transcript):
|
||||
"""Test that features include detected events."""
|
||||
features = extract_features(sample_transcript)
|
||||
|
||||
assert len(features.events) > 0
|
||||
|
||||
def test_features_have_metrics(self, sample_transcript):
|
||||
"""Test that features include turn metrics."""
|
||||
features = extract_features(sample_transcript)
|
||||
|
||||
assert features.turn_metrics is not None
|
||||
assert features.turn_metrics.total_turns == 6
|
||||
|
||||
def test_hold_aggregation(self, sample_transcript):
|
||||
"""Test hold count aggregation."""
|
||||
features = extract_features(sample_transcript)
|
||||
|
||||
# Should have at least one hold
|
||||
assert features.hold_count >= 1
|
||||
|
||||
def test_transfer_aggregation(self, sample_transcript):
|
||||
"""Test transfer count aggregation."""
|
||||
features = extract_features(sample_transcript)
|
||||
|
||||
assert features.transfer_count >= 1
|
||||
|
||||
def test_silence_aggregation(self, transcript_with_silences):
|
||||
"""Test silence count aggregation."""
|
||||
features = extract_features(transcript_with_silences)
|
||||
|
||||
assert features.silence_count == 2
|
||||
|
||||
def test_interruption_aggregation(self, transcript_with_interruptions):
|
||||
"""Test interruption count aggregation."""
|
||||
features = extract_features(transcript_with_interruptions)
|
||||
|
||||
assert features.interruption_count == 2
|
||||
|
||||
def test_deterministic_output(self, sample_transcript):
|
||||
"""Test that extraction is deterministic."""
|
||||
features1 = extract_features(sample_transcript)
|
||||
features2 = extract_features(sample_transcript)
|
||||
|
||||
# Same input should produce same output
|
||||
assert features1.hold_count == features2.hold_count
|
||||
assert features1.transfer_count == features2.transfer_count
|
||||
assert features1.silence_count == features2.silence_count
|
||||
assert len(features1.events) == len(features2.events)
|
||||
|
||||
|
||||
class TestSpanishPatterns:
|
||||
"""Tests for Spanish language pattern detection."""
|
||||
|
||||
def test_hold_patterns_spanish(self):
|
||||
"""Test various Spanish hold patterns."""
|
||||
patterns_to_test = [
|
||||
("Un momento, por favor", True),
|
||||
("Le voy a poner en espera", True),
|
||||
("Espere un segundo", True),
|
||||
("No cuelgue", True),
|
||||
("Déjeme verificar", True),
|
||||
("Buenos días", False),
|
||||
("Gracias por llamar", False),
|
||||
]
|
||||
|
||||
for text, should_match in patterns_to_test:
|
||||
transcript = Transcript(
|
||||
call_id="TEST",
|
||||
turns=[
|
||||
SpeakerTurn(
|
||||
speaker="agent",
|
||||
text=text,
|
||||
start_time=0.0,
|
||||
end_time=3.0,
|
||||
),
|
||||
],
|
||||
metadata=TranscriptMetadata(
|
||||
audio_duration_sec=10.0,
|
||||
audio_file="test.mp3",
|
||||
provider="test",
|
||||
),
|
||||
)
|
||||
|
||||
events = detect_events(transcript)
|
||||
hold_starts = [e for e in events if e.event_type == EventType.HOLD_START]
|
||||
|
||||
if should_match:
|
||||
assert len(hold_starts) >= 1, f"Should match: {text}"
|
||||
else:
|
||||
assert len(hold_starts) == 0, f"Should not match: {text}"
|
||||
|
||||
def test_transfer_patterns_spanish(self):
|
||||
"""Test various Spanish transfer patterns."""
|
||||
patterns_to_test = [
|
||||
("Le voy a transferir con el departamento de ventas", True),
|
||||
("Le paso con mi compañero", True),
|
||||
("Le comunico con facturación", True),
|
||||
("Va a ser transferido", True),
|
||||
("Gracias por su paciencia", False),
|
||||
]
|
||||
|
||||
for text, should_match in patterns_to_test:
|
||||
transcript = Transcript(
|
||||
call_id="TEST",
|
||||
turns=[
|
||||
SpeakerTurn(
|
||||
speaker="agent",
|
||||
text=text,
|
||||
start_time=0.0,
|
||||
end_time=3.0,
|
||||
),
|
||||
],
|
||||
metadata=TranscriptMetadata(
|
||||
audio_duration_sec=10.0,
|
||||
audio_file="test.mp3",
|
||||
provider="test",
|
||||
),
|
||||
)
|
||||
|
||||
events = detect_events(transcript)
|
||||
transfers = [e for e in events if e.event_type == EventType.TRANSFER]
|
||||
|
||||
if should_match:
|
||||
assert len(transfers) >= 1, f"Should match: {text}"
|
||||
else:
|
||||
assert len(transfers) == 0, f"Should not match: {text}"
|
||||
Reference in New Issue
Block a user