|
| 1 | +# tests/test_spacy_nlp.py |
| 2 | +from unittest.mock import MagicMock, patch |
| 3 | +from uuid import UUID |
| 4 | + |
| 5 | +import pytest |
| 6 | + |
| 7 | +from datafog.models.spacy_nlp import AnnotationResult, SpacyAnnotator |
| 8 | + |
| 9 | + |
| 10 | +@patch("datafog.models.spacy_nlp.spacy.load") |
| 11 | +def test_annotate_text_basic(mock_spacy_load): |
| 12 | + """ |
| 13 | + Test that annotate_text correctly processes text and returns AnnotationResult objects. |
| 14 | + """ |
| 15 | + # Arrange: Mock the spaCy NLP object and its return value |
| 16 | + mock_nlp = MagicMock() |
| 17 | + mock_doc = MagicMock() |
| 18 | + |
| 19 | + # Simulate entities found by spaCy |
| 20 | + mock_ent1 = MagicMock() |
| 21 | + mock_ent1.start_char = 0 |
| 22 | + mock_ent1.end_char = 4 |
| 23 | + mock_ent1.label_ = "PERSON" |
| 24 | + |
| 25 | + mock_ent2 = MagicMock() |
| 26 | + mock_ent2.start_char = 11 |
| 27 | + mock_ent2.end_char = 17 |
| 28 | + mock_ent2.label_ = "LOCATION" # Use valid EntityTypes member |
| 29 | + |
| 30 | + mock_doc.ents = [mock_ent1, mock_ent2] |
| 31 | + mock_nlp.return_value = mock_doc # nlp(text) returns the mock_doc |
| 32 | + mock_spacy_load.return_value = mock_nlp # spacy.load() returns the mock_nlp |
| 33 | + |
| 34 | + # Instantiate the annotator (doesn't load model immediately) |
| 35 | + annotator = SpacyAnnotator() |
| 36 | + |
| 37 | + # Act: Call the method under test |
| 38 | + test_text = "John lives in London." |
| 39 | + results = annotator.annotate_text(test_text) |
| 40 | + |
| 41 | + # Assert: |
| 42 | + # Check that spacy.load was called (implicitly tests load_model) |
| 43 | + mock_spacy_load.assert_called_once_with(annotator.model_name) |
| 44 | + # Check that the nlp object was called with the text |
| 45 | + mock_nlp.assert_called_once() |
| 46 | + # Check the number of results |
| 47 | + assert len(results) == 2 |
| 48 | + |
| 49 | + # Check the details of the first result |
| 50 | + assert isinstance(results[0], AnnotationResult) |
| 51 | + assert results[0].start == 0 |
| 52 | + assert results[0].end == 4 |
| 53 | + assert results[0].entity_type == "PERSON" |
| 54 | + assert isinstance(results[0].score, float) |
| 55 | + |
| 56 | + # Check the details of the second result |
| 57 | + assert isinstance(results[1], AnnotationResult) |
| 58 | + assert results[1].start == 11 |
| 59 | + assert results[1].end == 17 |
| 60 | + assert results[1].entity_type == "LOCATION" # Assert for LOCATION |
| 61 | + assert isinstance(results[1].score, float) |
| 62 | + |
| 63 | + |
| 64 | +# Example of testing other branches (e.g., model already loaded) |
| 65 | +@patch("datafog.models.spacy_nlp.spacy.load") |
| 66 | +def test_annotate_text_model_already_loaded(mock_spacy_load): |
| 67 | + """ |
| 68 | + Test that annotate_text doesn't reload the model if already loaded. |
| 69 | + """ |
| 70 | + # Arrange |
| 71 | + mock_nlp = MagicMock() |
| 72 | + mock_doc = MagicMock() |
| 73 | + mock_doc.ents = [] # No entities for simplicity |
| 74 | + mock_nlp.return_value = mock_doc |
| 75 | + mock_spacy_load.return_value = mock_nlp |
| 76 | + |
| 77 | + annotator = SpacyAnnotator() |
| 78 | + annotator.nlp = mock_nlp # Pre-set the nlp attribute |
| 79 | + |
| 80 | + # Act |
| 81 | + annotator.annotate_text("Some text.") |
| 82 | + |
| 83 | + # Assert |
| 84 | + mock_spacy_load.assert_not_called() # Should not be called again |
| 85 | + mock_nlp.assert_called_once_with("Some text.") |
0 commit comments