"""
CXInsights - Feature Extraction Tests

Tests for deterministic feature extraction.
"""

import pytest

from src.features.event_detector import EventDetector, EventDetectorConfig, detect_events
from src.features.extractor import FeatureExtractor, extract_features
from src.features.turn_metrics import TurnMetricsCalculator, calculate_turn_metrics
from src.models.call_analysis import EventType
from src.transcription.models import SpeakerTurn, Transcript, TranscriptMetadata


@pytest.fixture
def sample_transcript():
    """Create a sample transcript for testing."""
    return Transcript(
        call_id="TEST001",
        turns=[
            SpeakerTurn(
                speaker="agent",
                text="Buenos días, ¿en qué puedo ayudarle?",
                start_time=0.0,
                end_time=3.0,
            ),
            SpeakerTurn(
                speaker="customer",
                text="Hola, quiero cancelar mi servicio.",
                start_time=3.5,
                end_time=6.5,
            ),
            SpeakerTurn(
                speaker="agent",
                text="Entiendo. Un momento, por favor, le pongo en espera mientras consulto.",
                start_time=7.0,
                end_time=12.0,
            ),
            # Silence gap (hold)
            SpeakerTurn(
                speaker="agent",
                text="Gracias por la espera. Le cuento que tenemos una oferta especial.",
                start_time=45.0,
                end_time=52.0,
            ),
            SpeakerTurn(
                speaker="customer",
                text="No me interesa, es demasiado caro.",
                start_time=52.5,
                end_time=56.0,
            ),
            SpeakerTurn(
                speaker="agent",
                text="Le voy a transferir con el departamento de retenciones.",
                start_time=56.5,
                end_time=61.0,
            ),
        ],
        metadata=TranscriptMetadata(
            audio_duration_sec=120.0,
            audio_file="TEST001.mp3",
            provider="test",
            speaker_count=2,
        ),
    )


@pytest.fixture
def transcript_with_interruptions():
    """Create a transcript with overlapping speech."""
    return Transcript(
        call_id="TEST002",
        turns=[
            SpeakerTurn(
                speaker="agent",
                text="Le explico cómo funciona el proceso...",
                start_time=0.0,
                end_time=5.0,
            ),
            SpeakerTurn(
                speaker="customer",
                text="Pero es que yo ya lo sé...",
                start_time=4.5,  # Starts before agent ends
                end_time=7.0,
            ),
            SpeakerTurn(
                speaker="agent",
                text="Perdone, le decía que...",
                start_time=6.8,  # Starts before customer ends
                end_time=10.0,
            ),
        ],
        metadata=TranscriptMetadata(
            audio_duration_sec=60.0,
            audio_file="TEST002.mp3",
            provider="test",
        ),
    )


@pytest.fixture
def transcript_with_silences():
    """Create a transcript with significant silences."""
    return Transcript(
        call_id="TEST003",
        turns=[
            SpeakerTurn(
                speaker="agent",
                text="Voy a comprobar su cuenta.",
                start_time=0.0,
                end_time=3.0,
            ),
            # 10 second gap
            SpeakerTurn(
                speaker="agent",
                text="Ya tengo la información.",
                start_time=13.0,
                end_time=16.0,
            ),
            # 8 second gap
            SpeakerTurn(
                speaker="customer",
                text="¿Y qué dice?",
                start_time=24.0,
                end_time=26.0,
            ),
        ],
        metadata=TranscriptMetadata(
            audio_duration_sec=30.0,
            audio_file="TEST003.mp3",
            provider="test",
        ),
    )


class TestEventDetector:
    """Tests for EventDetector."""

    def test_detect_hold_start(self, sample_transcript):
        """Test detection of hold start patterns."""
        events = detect_events(sample_transcript)

        hold_starts = [e for e in events if e.event_type == EventType.HOLD_START]
        assert len(hold_starts) >= 1
        # Should detect "Un momento, por favor, le pongo en espera"

    def test_detect_hold_end(self, sample_transcript):
        """Test detection of hold end patterns."""
        events = detect_events(sample_transcript)

        hold_ends = [e for e in events if e.event_type == EventType.HOLD_END]
        assert len(hold_ends) >= 1
        # Should detect "Gracias por la espera"

    def test_detect_transfer(self, sample_transcript):
        """Test detection of transfer patterns."""
        events = detect_events(sample_transcript)

        transfers = [e for e in events if e.event_type == EventType.TRANSFER]
        assert len(transfers) >= 1
        # Should detect "Le voy a transferir"

    def test_detect_silence(self, transcript_with_silences):
        """Test detection of significant silences."""
        config = EventDetectorConfig(silence_threshold_sec=5.0)
        detector = EventDetector(config)
        events = detector.detect_all(transcript_with_silences)

        silences = [e for e in events if e.event_type == EventType.SILENCE]
        assert len(silences) == 2  # Two gaps > 5 seconds
        assert silences[0].duration_sec == 10.0
        assert silences[1].duration_sec == 8.0

    def test_detect_interruptions(self, transcript_with_interruptions):
        """Test detection of interruptions."""
        events = detect_events(transcript_with_interruptions)

        interruptions = [e for e in events if e.event_type == EventType.INTERRUPTION]
        assert len(interruptions) == 2  # Two overlapping segments

    def test_events_sorted_by_time(self, sample_transcript):
        """Test that events are sorted by start time."""
        events = detect_events(sample_transcript)

        for i in range(1, len(events)):
            assert events[i].start_time >= events[i - 1].start_time

    def test_event_has_observed_source(self, sample_transcript):
        """Test that all events have source='observed'."""
        events = detect_events(sample_transcript)

        for event in events:
            assert event.source == "observed"


class TestTurnMetrics:
    """Tests for TurnMetricsCalculator."""

    def test_turn_counts(self, sample_transcript):
        """Test turn counting."""
        metrics = calculate_turn_metrics(sample_transcript)

        assert metrics.total_turns == 6
        assert metrics.agent_turns == 4
        assert metrics.customer_turns == 2

    def test_talk_ratios(self, sample_transcript):
        """Test talk ratio calculations."""
        metrics = calculate_turn_metrics(sample_transcript)

        # Ratios should be between 0 and 1
        assert 0 <= metrics.agent_talk_ratio <= 1
        assert 0 <= metrics.customer_talk_ratio <= 1
        assert 0 <= metrics.silence_ratio <= 1

        # Sum should be approximately 1 (may have gaps)
        total = metrics.agent_talk_ratio + metrics.customer_talk_ratio + metrics.silence_ratio
        assert total <= 1.1  # Allow small rounding

    def test_interruption_count(self, transcript_with_interruptions):
        """Test interruption counting in metrics."""
        metrics = calculate_turn_metrics(transcript_with_interruptions)

        assert metrics.interruption_count == 2

    def test_avg_turn_duration(self, sample_transcript):
        """Test average turn duration calculation."""
        metrics = calculate_turn_metrics(sample_transcript)

        assert metrics.avg_turn_duration_sec > 0

    def test_metrics_has_observed_source(self, sample_transcript):
        """Test that metrics have source='observed'."""
        metrics = calculate_turn_metrics(sample_transcript)

        assert metrics.source == "observed"

    def test_empty_transcript(self):
        """Test handling of empty transcript."""
        empty = Transcript(
            call_id="EMPTY",
            turns=[],
            metadata=TranscriptMetadata(
                audio_duration_sec=0.0,
                audio_file="empty.mp3",
                provider="test",
            ),
        )

        metrics = calculate_turn_metrics(empty)

        assert metrics.total_turns == 0
        assert metrics.agent_turns == 0
        assert metrics.customer_turns == 0


class TestFeatureExtractor:
    """Tests for FeatureExtractor."""

    def test_extract_features(self, sample_transcript):
        """Test complete feature extraction."""
        features = extract_features(sample_transcript)

        assert features.call_id == "TEST001"
        assert features.audio_duration_sec == 120.0
        assert features.language == "es"

    def test_features_have_events(self, sample_transcript):
        """Test that features include detected events."""
        features = extract_features(sample_transcript)

        assert len(features.events) > 0

    def test_features_have_metrics(self, sample_transcript):
        """Test that features include turn metrics."""
        features = extract_features(sample_transcript)

        assert features.turn_metrics is not None
        assert features.turn_metrics.total_turns == 6

    def test_hold_aggregation(self, sample_transcript):
        """Test hold count aggregation."""
        features = extract_features(sample_transcript)

        # Should have at least one hold
        assert features.hold_count >= 1

    def test_transfer_aggregation(self, sample_transcript):
        """Test transfer count aggregation."""
        features = extract_features(sample_transcript)

        assert features.transfer_count >= 1

    def test_silence_aggregation(self, transcript_with_silences):
        """Test silence count aggregation."""
        features = extract_features(transcript_with_silences)

        assert features.silence_count == 2

    def test_interruption_aggregation(self, transcript_with_interruptions):
        """Test interruption count aggregation."""
        features = extract_features(transcript_with_interruptions)

        assert features.interruption_count == 2

    def test_deterministic_output(self, sample_transcript):
        """Test that extraction is deterministic."""
        features1 = extract_features(sample_transcript)
        features2 = extract_features(sample_transcript)

        # Same input should produce same output
        assert features1.hold_count == features2.hold_count
        assert features1.transfer_count == features2.transfer_count
        assert features1.silence_count == features2.silence_count
        assert len(features1.events) == len(features2.events)


class TestSpanishPatterns:
    """Tests for Spanish language pattern detection."""

    def test_hold_patterns_spanish(self):
        """Test various Spanish hold patterns."""
        patterns_to_test = [
            ("Un momento, por favor", True),
            ("Le voy a poner en espera", True),
            ("Espere un segundo", True),
            ("No cuelgue", True),
            ("Déjeme verificar", True),
            ("Buenos días", False),
            ("Gracias por llamar", False),
        ]

        for text, should_match in patterns_to_test:
            transcript = Transcript(
                call_id="TEST",
                turns=[
                    SpeakerTurn(
                        speaker="agent",
                        text=text,
                        start_time=0.0,
                        end_time=3.0,
                    ),
                ],
                metadata=TranscriptMetadata(
                    audio_duration_sec=10.0,
                    audio_file="test.mp3",
                    provider="test",
                ),
            )

            events = detect_events(transcript)
            hold_starts = [e for e in events if e.event_type == EventType.HOLD_START]

            if should_match:
                assert len(hold_starts) >= 1, f"Should match: {text}"
            else:
                assert len(hold_starts) == 0, f"Should not match: {text}"

    def test_transfer_patterns_spanish(self):
        """Test various Spanish transfer patterns."""
        patterns_to_test = [
            ("Le voy a transferir con el departamento de ventas", True),
            ("Le paso con mi compañero", True),
            ("Le comunico con facturación", True),
            ("Va a ser transferido", True),
            ("Gracias por su paciencia", False),
        ]

        for text, should_match in patterns_to_test:
            transcript = Transcript(
                call_id="TEST",
                turns=[
                    SpeakerTurn(
                        speaker="agent",
                        text=text,
                        start_time=0.0,
                        end_time=3.0,
                    ),
                ],
                metadata=TranscriptMetadata(
                    audio_duration_sec=10.0,
                    audio_file="test.mp3",
                    provider="test",
                ),
            )

            events = detect_events(transcript)
            transfers = [e for e in events if e.event_type == EventType.TRANSFER]

            if should_match:
                assert len(transfers) >= 1, f"Should match: {text}"
            else:
                assert len(transfers) == 0, f"Should not match: {text}"