diff --git a/.github/workflows/column2Vec_test.yml b/.github/workflows/column2Vec_test.yml index 55d5046..7aede4b 100644 --- a/.github/workflows/column2Vec_test.yml +++ b/.github/workflows/column2Vec_test.yml @@ -27,5 +27,5 @@ jobs: - name: Run tests run: | - pytest test/test_column2Vec.py test/test_column2VecCache.py + pytest test/test_column2Vec.py diff --git a/.github/workflows/py_test.yml b/.github/workflows/py_test.yml index 749dfb4..f98708b 100644 --- a/.github/workflows/py_test.yml +++ b/.github/workflows/py_test.yml @@ -34,7 +34,7 @@ jobs: python-tests: env: - TEST_FILES: test/test_types.py test/test_metadata.py test/test_comparator.py + TEST_FILES: test/test_types.py test/test_metadata.py test/test_comparator.py test/test_column2VecCache.py name: Run Python Tests runs-on: ubuntu-latest steps: diff --git a/test/test_column2VecCache.py b/test/test_column2VecCache.py index bfbf4c2..b9102b8 100644 --- a/test/test_column2VecCache.py +++ b/test/test_column2VecCache.py @@ -27,9 +27,10 @@ def setUpClass(self): files = [fileM2] self.data = get_nonnumerical_data(files) for i in self.data: - self.first = self.data[i] + self.first = self.data[i].head(100) break cache.set_file("generated/test.csv") + self.model = SentenceTransformer(MODEL) def setUp(self): cache.clear_cache() @@ -37,12 +38,12 @@ def setUp(self): cache.on() def test_column2vec_as_sentence(self): - model = SentenceTransformer(MODEL) - first = time_measure_function(column2vec_as_sentence, self.first, model, "a") - second = time_measure_function(column2vec_as_sentence, self.first, model, "a") + first = time_measure_function(column2vec_as_sentence, self.first, self.model, "a") + + second = time_measure_function(column2vec_as_sentence, self.first, self.model, "a") cache.off() - third = time_measure_function(column2vec_as_sentence, self.first, model, "a") + third = time_measure_function(column2vec_as_sentence, self.first, self.model, "a") print(f"{first} : {second} : {third}") self.assertGreater(first, second) @@ -50,37 +51,37 @@ def test_column2vec_as_sentence(self): def test_column2vec_as_sentence_clean(self): - model = SentenceTransformer(MODEL) - first = time_measure_function(column2vec_as_sentence_clean, self.first, model, "a") - second = time_measure_function(column2vec_as_sentence_clean, self.first, model, "a") + first = time_measure_function(column2vec_as_sentence_clean, self.first, self.model, "a") + + second = time_measure_function(column2vec_as_sentence_clean, self.first, self.model, "a") cache.off() - third = time_measure_function(column2vec_as_sentence_clean, self.first, model, "a") + third = time_measure_function(column2vec_as_sentence_clean, self.first, self.model, "a") print(f"{first} : {second} : {third}") self.assertGreater(first, second) self.assertGreater(third, second) def test_column2vec_as_sentence_clean_uniq(self): - model = SentenceTransformer(MODEL) - first = time_measure_function(column2vec_as_sentence_clean_uniq, self.first, model, "a") - second = time_measure_function(column2vec_as_sentence_clean_uniq, self.first, model, "a") + first = time_measure_function(column2vec_as_sentence_clean_uniq, self.first, self.model, "a") + + second = time_measure_function(column2vec_as_sentence_clean_uniq, self.first, self.model, "a") cache.off() - third = time_measure_function(column2vec_as_sentence_clean_uniq, self.first, model, "a") + third = time_measure_function(column2vec_as_sentence_clean_uniq, self.first, self.model, "a") print(f"{first} : {second} : {third}") self.assertGreater(first, second) self.assertGreater(third, second) def test_column2vec_avg(self): - model = SentenceTransformer(MODEL) - first = time_measure_function(column2vec_avg, self.first, model, "a") - second = time_measure_function(column2vec_avg, self.first, model, "a") + first = time_measure_function(column2vec_avg, self.first, self.model, "a") + + second = time_measure_function(column2vec_avg, self.first, self.model, "a") cache.off() - third = time_measure_function(column2vec_avg, self.first, model, "a") + third = time_measure_function(column2vec_avg, self.first, self.model, "a") print(f"{first} : {second} : {third}") self.assertGreater(first, second) @@ -88,12 +89,12 @@ def test_column2vec_avg(self): def test_column2vec_weighted_avg(self): - model = SentenceTransformer(MODEL) - first = time_measure_function(column2vec_weighted_avg, self.first, model, "a") - second = time_measure_function(column2vec_weighted_avg, self.first, model, "a") + first = time_measure_function(column2vec_weighted_avg, self.first, self.model, "a") + + second = time_measure_function(column2vec_weighted_avg, self.first, self.model, "a") cache.off() - third = time_measure_function(column2vec_weighted_avg, self.first, model, "a") + third = time_measure_function(column2vec_weighted_avg, self.first, self.model, "a") print(f"{first} : {second} : {third}") @@ -102,12 +103,12 @@ def test_column2vec_weighted_avg(self): def test_column2vec_sum(self): - model = SentenceTransformer(MODEL) - first = time_measure_function(column2vec_sum, self.first, model, "a") - second = time_measure_function(column2vec_sum, self.first, model, "a") + first = time_measure_function(column2vec_sum, self.first, self.model, "a") + + second = time_measure_function(column2vec_sum, self.first, self.model, "a") cache.off() - third = time_measure_function(column2vec_sum, self.first, model, "a") + third = time_measure_function(column2vec_sum, self.first, self.model, "a") print(f"{first} : {second} : {third}") @@ -116,13 +117,13 @@ def test_column2vec_sum(self): def test_column2vec_weighted_sum(self): - model = SentenceTransformer(MODEL) - first = time_measure_function(column2vec_weighted_sum, self.first, model, "a") - second = time_measure_function(column2vec_weighted_sum, self.first, model, "a") + first = time_measure_function(column2vec_weighted_sum, self.first, self.model, "a") + + second = time_measure_function(column2vec_weighted_sum, self.first, self.model, "a") cache.off() - third = time_measure_function(column2vec_weighted_sum, self.first, model, "a") + third = time_measure_function(column2vec_weighted_sum, self.first, self.model, "a") print(f"{first} : {second} : {third}") @@ -138,10 +139,15 @@ def setUpClass(self): # make an array of all the files files = [fileM2] self.data = get_nonnumerical_data(files) + skip = True for i in self.data: - self.first = self.data[i] + if skip: + skip = False + continue + self.first = self.data[i].head(100) break cache.set_file("cache_test.csv") + self.model = SentenceTransformer(MODEL) def setUp(self): cache.clear_cache() @@ -149,14 +155,14 @@ def setUp(self): cache.on() def test_column2vec_as_sentence(self): - model = SentenceTransformer(MODEL) - first = time_measure_function(column2vec_as_sentence, self.first, model, "a") + + first = time_measure_function(column2vec_as_sentence, self.first, self.model, "a") cache.save_persistently() cache.clear_cache() - second = time_measure_function(column2vec_as_sentence, self.first, model, "a") + second = time_measure_function(column2vec_as_sentence, self.first, self.model, "a") cache.off() - third = time_measure_function(column2vec_as_sentence, self.first, model, "a") + third = time_measure_function(column2vec_as_sentence, self.first, self.model, "a") print(f"{first} : {second} : {third}") self.assertGreater(first, second) @@ -164,43 +170,43 @@ def test_column2vec_as_sentence(self): def test_column2vec_as_sentence_clean(self): - model = SentenceTransformer(MODEL) - first = time_measure_function(column2vec_as_sentence_clean, self.first, model, "a") + + first = time_measure_function(column2vec_as_sentence_clean, self.first, self.model, "a") cache.save_persistently() cache.clear_cache() - second = time_measure_function(column2vec_as_sentence_clean, self.first, model, "a") + second = time_measure_function(column2vec_as_sentence_clean, self.first, self.model, "a") cache.off() - third = time_measure_function(column2vec_as_sentence_clean, self.first, model, "a") + third = time_measure_function(column2vec_as_sentence_clean, self.first, self.model, "a") print(f"{first} : {second} : {third}") self.assertGreater(first, second) self.assertGreater(third, second) def test_column2vec_as_sentence_clean_uniq(self): - model = SentenceTransformer(MODEL) - first = time_measure_function(column2vec_as_sentence_clean_uniq, self.first, model, "a") + + first = time_measure_function(column2vec_as_sentence_clean_uniq, self.first, self.model, "a") cache.save_persistently() cache.clear_cache() - second = time_measure_function(column2vec_as_sentence_clean_uniq, self.first, model, "a") + second = time_measure_function(column2vec_as_sentence_clean_uniq, self.first, self.model, "a") cache.off() - third = time_measure_function(column2vec_as_sentence_clean_uniq, self.first, model, "a") + third = time_measure_function(column2vec_as_sentence_clean_uniq, self.first, self.model, "a") print(f"{first} : {second} : {third}") self.assertGreater(first, second) self.assertGreater(third, second) def test_column2vec_avg(self): - model = SentenceTransformer(MODEL) - first = time_measure_function(column2vec_avg, self.first, model, "a") + + first = time_measure_function(column2vec_avg, self.first, self.model, "a") cache.save_persistently() cache.clear_cache() - second = time_measure_function(column2vec_avg, self.first, model, "a") + second = time_measure_function(column2vec_avg, self.first, self.model, "a") cache.off() - third = time_measure_function(column2vec_avg, self.first, model, "a") + third = time_measure_function(column2vec_avg, self.first, self.model, "a") print(f"{first} : {second} : {third}") self.assertGreater(first, second) @@ -208,14 +214,14 @@ def test_column2vec_avg(self): def test_column2vec_weighted_avg(self): - model = SentenceTransformer(MODEL) - first = time_measure_function(column2vec_weighted_avg, self.first, model, "a") + + first = time_measure_function(column2vec_weighted_avg, self.first, self.model, "a") cache.save_persistently() cache.clear_cache() - second = time_measure_function(column2vec_weighted_avg, self.first, model, "a") + second = time_measure_function(column2vec_weighted_avg, self.first, self.model, "a") cache.off() - third = time_measure_function(column2vec_weighted_avg, self.first, model, "a") + third = time_measure_function(column2vec_weighted_avg, self.first, self.model, "a") print(f"{first} : {second} : {third}") @@ -224,14 +230,14 @@ def test_column2vec_weighted_avg(self): def test_column2vec_sum(self): - model = SentenceTransformer(MODEL) - first = time_measure_function(column2vec_sum, self.first, model, "a") + + first = time_measure_function(column2vec_sum, self.first, self.model, "a") cache.save_persistently() cache.clear_cache() - second = time_measure_function(column2vec_sum, self.first, model, "a") + second = time_measure_function(column2vec_sum, self.first, self.model, "a") cache.off() - third = time_measure_function(column2vec_sum, self.first, model, "a") + third = time_measure_function(column2vec_sum, self.first, self.model, "a") print(f"{first} : {second} : {third}") @@ -240,15 +246,15 @@ def test_column2vec_sum(self): def test_column2vec_weighted_sum(self): - model = SentenceTransformer(MODEL) - first = time_measure_function(column2vec_weighted_sum, self.first, model, "a") + + first = time_measure_function(column2vec_weighted_sum, self.first, self.model, "a") cache.save_persistently() cache.clear_cache() - second = time_measure_function(column2vec_weighted_sum, self.first, model, "a") + second = time_measure_function(column2vec_weighted_sum, self.first, self.model, "a") cache.off() - third = time_measure_function(column2vec_weighted_sum, self.first, model, "a") + third = time_measure_function(column2vec_weighted_sum, self.first, self.model, "a") print(f"{first} : {second} : {third}")