Skip to content

Commit

Permalink
Enable Opensearch unit tests in Windows CI (#2936)
Browse files Browse the repository at this point in the history
* enable Opensearch unit tests under Win

* move unit tests into a dedicated job

* skip audio tests on missing dependencies

* avoid failing test collection when soundfile is not available

* Update .github/workflows/tests.yml

Co-authored-by: Sara Zan <[email protected]>

Co-authored-by: Sara Zan <[email protected]>
  • Loading branch information
masci and ZanSara authored Aug 3, 2022
1 parent 1b238c8 commit 40d07c2
Show file tree
Hide file tree
Showing 4 changed files with 177 additions and 115 deletions.
22 changes: 21 additions & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,27 @@ jobs:
run: |
pylint -ry -j 0 haystack/ rest_api/ ui/
unit-tests:
name: Unit / ${{ matrix.os }}
needs:
- mypy
- pylint
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v2

- name: Setup Python
uses: ./.github/actions/python_cache/

- name: Install Haystack
run: pip install .[all]

- name: Run
run: pytest -m "unit" test/

unit-tests-linux:
needs:
Expand All @@ -88,7 +109,6 @@ jobs:
- "pipelines"
- "modeling"
- "others"
- "document_stores/test_opensearch.py"

runs-on: ubuntu-latest
timeout-minutes: 30
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ minversion = "6.0"
addopts = "--strict-markers"
markers = [
"integration: integration tests",
"unit: unit tests",

"generator: generator tests",
"summarizer: summarizer tests",
Expand Down
47 changes: 42 additions & 5 deletions test/document_stores/test_opensearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,9 @@
from haystack.schema import Document, Label, Answer
from haystack.errors import DocumentStoreError


# Skip OpenSearchDocumentStore tests on Windows
pytestmark = pytest.mark.skipif(sys.platform in ["win32", "cygwin"], reason="Opensearch not running on Windows CI")

# Being all the tests in this module, ideally we wouldn't need a marker here,
# but this is to allow this test suite to be skipped when running (e.g.)
# `pytest test/document_stores --document-store-type=faiss`
@pytest.mark.opensearch
class TestOpenSearchDocumentStore:

# Constants
Expand Down Expand Up @@ -210,6 +205,7 @@ def test_clone_embedding_field(self, ds, documents):

# Unit tests

@pytest.mark.unit
def test___init___api_key_raises_warning(self, mocked_document_store, caplog):
with caplog.at_level(logging.WARN, logger="haystack.document_stores.opensearch"):
mocked_document_store.__init__(api_key="foo")
Expand All @@ -220,13 +216,15 @@ def test___init___api_key_raises_warning(self, mocked_document_store, caplog):
for r in caplog.records:
assert r.levelname == "WARNING"

@pytest.mark.unit
def test___init___connection_test_fails(self, mocked_document_store):
failing_client = MagicMock()
failing_client.indices.get.side_effect = Exception("The client failed!")
mocked_document_store._init_client.return_value = failing_client
with pytest.raises(ConnectionError):
mocked_document_store.__init__()

@pytest.mark.unit
def test___init___client_params(self, mocked_open_search_init, _init_client_params):
"""
Ensure the Opensearch-py client was initialized with the right params
Expand All @@ -244,44 +242,51 @@ def test___init___client_params(self, mocked_open_search_init, _init_client_para
"connection_class": RequestsHttpConnection,
}

@pytest.mark.unit
def test__init_client_use_system_proxy_use_sys_proxy(self, mocked_open_search_init, _init_client_params):
_init_client_params["use_system_proxy"] = False
OpenSearchDocumentStore._init_client(**_init_client_params)
_, kwargs = mocked_open_search_init.call_args
assert kwargs["connection_class"] == Urllib3HttpConnection

@pytest.mark.unit
def test__init_client_use_system_proxy_dont_use_sys_proxy(self, mocked_open_search_init, _init_client_params):
_init_client_params["use_system_proxy"] = True
OpenSearchDocumentStore._init_client(**_init_client_params)
_, kwargs = mocked_open_search_init.call_args
assert kwargs["connection_class"] == RequestsHttpConnection

@pytest.mark.unit
def test__init_client_auth_methods_username_password(self, mocked_open_search_init, _init_client_params):
_init_client_params["username"] = "user"
_init_client_params["aws4auth"] = None
OpenSearchDocumentStore._init_client(**_init_client_params)
_, kwargs = mocked_open_search_init.call_args
assert kwargs["http_auth"] == ("user", "pass")

@pytest.mark.unit
def test__init_client_auth_methods_aws_iam(self, mocked_open_search_init, _init_client_params):
_init_client_params["username"] = ""
_init_client_params["aws4auth"] = "foo"
OpenSearchDocumentStore._init_client(**_init_client_params)
_, kwargs = mocked_open_search_init.call_args
assert kwargs["http_auth"] == "foo"

@pytest.mark.unit
def test__init_client_auth_methods_no_auth(self, mocked_open_search_init, _init_client_params):
_init_client_params["username"] = ""
_init_client_params["aws4auth"] = None
OpenSearchDocumentStore._init_client(**_init_client_params)
_, kwargs = mocked_open_search_init.call_args
assert "http_auth" not in kwargs

@pytest.mark.unit
def test_query_by_embedding_raises_if_missing_field(self, mocked_document_store):
mocked_document_store.embedding_field = ""
with pytest.raises(DocumentStoreError):
mocked_document_store.query_by_embedding(self.query_emb)

@pytest.mark.unit
def test_query_by_embedding_filters(self, mocked_document_store):
expected_filters = {"type": "article", "date": {"$gte": "2015-01-01", "$lt": "2021-01-01"}}
mocked_document_store.query_by_embedding(self.query_emb, filters=expected_filters)
Expand All @@ -293,13 +298,15 @@ def test_query_by_embedding_filters(self, mocked_document_store):
{"range": {"date": {"gte": "2015-01-01", "lt": "2021-01-01"}}},
]

@pytest.mark.unit
def test_query_by_embedding_return_embedding_false(self, mocked_document_store):
mocked_document_store.return_embedding = False
mocked_document_store.query_by_embedding(self.query_emb)
# assert the resulting body is consistent with the `excluded_meta_data` value
_, kwargs = mocked_document_store.client.search.call_args
assert kwargs["body"]["_source"] == {"excludes": ["embedding"]}

@pytest.mark.unit
def test_query_by_embedding_excluded_meta_data_return_embedding_true(self, mocked_document_store):
"""
Test that when `return_embedding==True` the field should NOT be excluded even if it
Expand All @@ -312,6 +319,7 @@ def test_query_by_embedding_excluded_meta_data_return_embedding_true(self, mocke
# we expect "embedding" was removed from the final query
assert kwargs["body"]["_source"] == {"excludes": ["foo"]}

@pytest.mark.unit
def test_query_by_embedding_excluded_meta_data_return_embedding_false(self, mocked_document_store):
"""
Test that when `return_embedding==False`, the final query excludes the `embedding` field
Expand All @@ -324,6 +332,7 @@ def test_query_by_embedding_excluded_meta_data_return_embedding_false(self, mock
_, kwargs = mocked_document_store.client.search.call_args
assert kwargs["body"]["_source"] == {"excludes": ["foo", "embedding"]}

@pytest.mark.unit
def test__create_document_index_with_alias(self, mocked_document_store, caplog):
mocked_document_store.client.indices.exists_alias.return_value = True

Expand All @@ -332,6 +341,7 @@ def test__create_document_index_with_alias(self, mocked_document_store, caplog):

assert f"Index name {self.index_name} is an alias." in caplog.text

@pytest.mark.unit
def test__create_document_index_wrong_mapping_raises(self, mocked_document_store, index):
"""
Ensure the method raises if we specify a field in `search_fields` that's not text
Expand All @@ -342,6 +352,7 @@ def test__create_document_index_wrong_mapping_raises(self, mocked_document_store
with pytest.raises(Exception, match=f"The search_field 'age' of index '{self.index_name}' with type 'integer'"):
mocked_document_store._create_document_index(self.index_name)

@pytest.mark.unit
def test__create_document_index_create_mapping_if_missing(self, mocked_document_store, index):
mocked_document_store.client.indices.exists.return_value = True
mocked_document_store.client.indices.get.return_value = {self.index_name: index}
Expand All @@ -354,6 +365,7 @@ def test__create_document_index_create_mapping_if_missing(self, mocked_document_
assert kwargs["index"] == self.index_name
assert "doesnt_have_a_mapping" in kwargs["body"]["properties"]

@pytest.mark.unit
def test__create_document_index_with_bad_field_raises(self, mocked_document_store, index):
mocked_document_store.client.indices.exists.return_value = True
mocked_document_store.client.indices.get.return_value = {self.index_name: index}
Expand All @@ -364,6 +376,7 @@ def test__create_document_index_with_bad_field_raises(self, mocked_document_stor
):
mocked_document_store._create_document_index(self.index_name)

@pytest.mark.unit
def test__create_document_index_with_existing_mapping_but_no_method(self, mocked_document_store, index):
"""
We call the method passing a properly mapped field but without the `method` specified in the mapping
Expand All @@ -381,6 +394,7 @@ def test__create_document_index_with_existing_mapping_but_no_method(self, mocked
# False but I'm not sure this is by design
assert mocked_document_store.embeddings_field_supports_similarity is False

@pytest.mark.unit
def test__create_document_index_with_existing_mapping_similarity(self, mocked_document_store, index):
mocked_document_store.client.indices.exists.return_value = True
mocked_document_store.client.indices.get.return_value = {self.index_name: index}
Expand All @@ -390,6 +404,7 @@ def test__create_document_index_with_existing_mapping_similarity(self, mocked_do
mocked_document_store._create_document_index(self.index_name)
assert mocked_document_store.embeddings_field_supports_similarity is True

@pytest.mark.unit
def test__create_document_index_with_existing_mapping_similarity_mismatch(
self, mocked_document_store, index, caplog
):
Expand All @@ -403,6 +418,7 @@ def test__create_document_index_with_existing_mapping_similarity_mismatch(
assert "Embedding field 'vec' is optimized for similarity 'dot_product'." in caplog.text
assert mocked_document_store.embeddings_field_supports_similarity is False

@pytest.mark.unit
def test__create_document_index_with_existing_mapping_adjust_params_hnsw_default(
self, mocked_document_store, index
):
Expand All @@ -420,6 +436,7 @@ def test__create_document_index_with_existing_mapping_adjust_params_hnsw_default
_, kwargs = mocked_document_store.client.indices.put_settings.call_args
assert kwargs["body"] == {"knn.algo_param.ef_search": 20}

@pytest.mark.unit
def test__create_document_index_with_existing_mapping_adjust_params_hnsw(self, mocked_document_store, index):
"""
Test a value of `knn.algo_param` that needs to be adjusted
Expand All @@ -436,6 +453,7 @@ def test__create_document_index_with_existing_mapping_adjust_params_hnsw(self, m
_, kwargs = mocked_document_store.client.indices.put_settings.call_args
assert kwargs["body"] == {"knn.algo_param.ef_search": 20}

@pytest.mark.unit
def test__create_document_index_with_existing_mapping_adjust_params_flat_default(
self, mocked_document_store, index
):
Expand All @@ -451,6 +469,7 @@ def test__create_document_index_with_existing_mapping_adjust_params_flat_default

mocked_document_store.client.indices.put_settings.assert_not_called

@pytest.mark.unit
def test__create_document_index_with_existing_mapping_adjust_params_hnsw(self, mocked_document_store, index):
"""
Test a value of `knn.algo_param` that needs to be adjusted
Expand All @@ -467,6 +486,7 @@ def test__create_document_index_with_existing_mapping_adjust_params_hnsw(self, m
_, kwargs = mocked_document_store.client.indices.put_settings.call_args
assert kwargs["body"] == {"knn.algo_param.ef_search": 512}

@pytest.mark.unit
def test__create_document_index_no_index_custom_mapping(self, mocked_document_store):
mocked_document_store.client.indices.exists.return_value = False
mocked_document_store.custom_mapping = {"mappings": {"properties": {"a_number": {"type": "integer"}}}}
Expand All @@ -475,6 +495,7 @@ def test__create_document_index_no_index_custom_mapping(self, mocked_document_st
_, kwargs = mocked_document_store.client.indices.create.call_args
assert kwargs["body"] == {"mappings": {"properties": {"a_number": {"type": "integer"}}}}

@pytest.mark.unit
def test__create_document_index_no_index_no_mapping(self, mocked_document_store):
mocked_document_store.client.indices.exists.return_value = False
mocked_document_store._create_document_index(self.index_name)
Expand Down Expand Up @@ -502,6 +523,7 @@ def test__create_document_index_no_index_no_mapping(self, mocked_document_store)
"settings": {"analysis": {"analyzer": {"default": {"type": "standard"}}}, "index": {"knn": True}},
}

@pytest.mark.unit
def test__create_document_index_no_index_no_mapping_with_synonyms(self, mocked_document_store):
mocked_document_store.client.indices.exists.return_value = False
mocked_document_store.search_fields = ["occupation"]
Expand Down Expand Up @@ -542,6 +564,7 @@ def test__create_document_index_no_index_no_mapping_with_synonyms(self, mocked_d
},
}

@pytest.mark.unit
def test__create_document_index_no_index_no_mapping_with_embedding_field(self, mocked_document_store):
mocked_document_store.client.indices.exists.return_value = False
mocked_document_store.embedding_field = "vec"
Expand Down Expand Up @@ -575,13 +598,15 @@ def test__create_document_index_no_index_no_mapping_with_embedding_field(self, m
},
}

@pytest.mark.unit
def test__create_document_index_client_failure(self, mocked_document_store):
mocked_document_store.client.indices.exists.return_value = False
mocked_document_store.client.indices.create.side_effect = RequestError

with pytest.raises(RequestError):
mocked_document_store._create_document_index(self.index_name)

@pytest.mark.unit
def test__get_embedding_field_mapping_flat(self, mocked_document_store):
mocked_document_store.index_type = "flat"

Expand All @@ -596,6 +621,7 @@ def test__get_embedding_field_mapping_flat(self, mocked_document_store):
},
}

@pytest.mark.unit
def test__get_embedding_field_mapping_hnsw(self, mocked_document_store):
mocked_document_store.index_type = "hnsw"

Expand All @@ -610,6 +636,7 @@ def test__get_embedding_field_mapping_hnsw(self, mocked_document_store):
},
}

@pytest.mark.unit
def test__get_embedding_field_mapping_wrong(self, mocked_document_store, caplog):
mocked_document_store.index_type = "foo"

Expand All @@ -623,19 +650,22 @@ def test__get_embedding_field_mapping_wrong(self, mocked_document_store, caplog)
"method": {"space_type": "innerproduct", "name": "hnsw", "engine": "nmslib"},
}

@pytest.mark.unit
def test__create_label_index_already_exists(self, mocked_document_store):
mocked_document_store.client.indices.exists.return_value = True

mocked_document_store._create_label_index("foo")
mocked_document_store.client.indices.create.assert_not_called()

@pytest.mark.unit
def test__create_label_index_client_error(self, mocked_document_store):
mocked_document_store.client.indices.exists.return_value = False
mocked_document_store.client.indices.create.side_effect = RequestError

with pytest.raises(RequestError):
mocked_document_store._create_label_index("foo")

@pytest.mark.unit
def test__get_vector_similarity_query_support_true(self, mocked_document_store):
mocked_document_store.embedding_field = "FooField"
mocked_document_store.embeddings_field_supports_similarity = True
Expand All @@ -644,6 +674,7 @@ def test__get_vector_similarity_query_support_true(self, mocked_document_store):
"bool": {"must": [{"knn": {"FooField": {"vector": self.query_emb.tolist(), "k": 3}}}]}
}

@pytest.mark.unit
def test__get_vector_similarity_query_support_false(self, mocked_document_store):
mocked_document_store.embedding_field = "FooField"
mocked_document_store.embeddings_field_supports_similarity = False
Expand All @@ -664,28 +695,33 @@ def test__get_vector_similarity_query_support_false(self, mocked_document_store)
}
}

@pytest.mark.unit
def test__get_raw_similarity_score_dot(self, mocked_document_store):
mocked_document_store.similarity = "dot_product"
assert mocked_document_store._get_raw_similarity_score(2) == 1
assert mocked_document_store._get_raw_similarity_score(-2) == 1.5

@pytest.mark.unit
def test__get_raw_similarity_score_l2(self, mocked_document_store):
mocked_document_store.similarity = "l2"
assert mocked_document_store._get_raw_similarity_score(1) == 0

@pytest.mark.unit
def test__get_raw_similarity_score_cosine(self, mocked_document_store):
mocked_document_store.similarity = "cosine"
mocked_document_store.embeddings_field_supports_similarity = True
assert mocked_document_store._get_raw_similarity_score(1) == 1
mocked_document_store.embeddings_field_supports_similarity = False
assert mocked_document_store._get_raw_similarity_score(1) == 0

@pytest.mark.unit
def test_clone_embedding_field_duplicate_mapping(self, mocked_document_store, index):
mocked_document_store.client.indices.get.return_value = {self.index_name: index}
mocked_document_store.index = self.index_name
with pytest.raises(Exception, match="age already exists with mapping"):
mocked_document_store.clone_embedding_field("age", "cosine")

@pytest.mark.unit
def test_clone_embedding_field_update_mapping(self, mocked_document_store, index, monkeypatch):
mocked_document_store.client.indices.get.return_value = {self.index_name: index}
mocked_document_store.index = self.index_name
Expand All @@ -709,6 +745,7 @@ def test_clone_embedding_field_update_mapping(self, mocked_document_store, index


class TestOpenDistroElasticsearchDocumentStore:
@pytest.mark.unit
def test_deprecation_notice(self, monkeypatch, caplog):
klass = OpenDistroElasticsearchDocumentStore
monkeypatch.setattr(klass, "_init_client", MagicMock())
Expand Down
Loading

0 comments on commit 40d07c2

Please sign in to comment.