From 0ce441604d6e61d6b12116b5c5cffd77a2e5b6e2 Mon Sep 17 00:00:00 2001
From: Manik Rana <Manikrana54@gmail.com>
Date: Wed, 31 Jan 2024 19:09:50 +0530
Subject: [PATCH] tests: add tests for `go/mathstats` (#15054)

Signed-off-by: Manik Rana <manikrana54@gmail.com>
---
 go/mathstats/beta_test.go   |  27 +++++
 go/mathstats/sample_test.go | 210 +++++++++++++++++++++++++++++++++++-
 go/mathstats/tdist_test.go  |  21 +++-
 go/mathstats/ttest_test.go  | 163 +++++++++++++++++++++++++++-
 4 files changed, 418 insertions(+), 3 deletions(-)

diff --git a/go/mathstats/beta_test.go b/go/mathstats/beta_test.go
index 2878493a57d..524beda7fcd 100644
--- a/go/mathstats/beta_test.go
+++ b/go/mathstats/beta_test.go
@@ -5,7 +5,10 @@
 package mathstats
 
 import (
+	"math"
 	"testing"
+
+	"github.com/stretchr/testify/assert"
 )
 
 func TestBetaInc(t *testing.T) {
@@ -26,3 +29,27 @@ func TestBetaInc(t *testing.T) {
 			10: 0.01928710937500,
 		})
 }
+
+func TestBetaincPanic(t *testing.T) {
+	defer func() {
+		if r := recover(); r != nil {
+			assert.Contains(t, r, "betainc: a or b too big; failed to converge")
+		} else {
+			t.Error("Expected panic, but no panic occurred")
+		}
+	}()
+
+	a := 1e30
+	b := 1e30
+	x := 0.5
+
+	_ = mathBetaInc(x, a, b)
+}
+
+func TestMathBetaIncNaN(t *testing.T) {
+	x := -0.1
+
+	result := mathBetaInc(x, 2.0, 3.0)
+
+	assert.True(t, math.IsNaN(result), "Expected NaN for x < 0, got %v", result)
+}
diff --git a/go/mathstats/sample_test.go b/go/mathstats/sample_test.go
index fb9d6dbc6ee..c0da3c2b7f4 100644
--- a/go/mathstats/sample_test.go
+++ b/go/mathstats/sample_test.go
@@ -4,7 +4,12 @@
 
 package mathstats
 
-import "testing"
+import (
+	"math"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
 
 func TestSamplePercentile(t *testing.T) {
 	s := Sample{Xs: []float64{15, 20, 35, 40, 50}}
@@ -19,3 +24,206 @@ func TestSamplePercentile(t *testing.T) {
 		2:   50,
 	})
 }
+
+func TestSamplePercentileEmpty(t *testing.T) {
+	s := Sample{Xs: []float64{}}
+	assert.True(t, math.IsNaN(s.Percentile(0.5)), "Percentile should return NaN for empty sample")
+}
+
+func TestSampleStdDev(t *testing.T) {
+	values := []float64{2, 4, 4, 4, 5, 5, 7, 9}
+	expected := 2.138089935299395
+
+	sample := Sample{Xs: values}
+	result := sample.StdDev()
+
+	assert.Equal(t, expected, result)
+}
+
+func TestBounds(t *testing.T) {
+	tt := []struct {
+		xs  []float64
+		min float64
+		max float64
+	}{
+		{[]float64{15, 20, 35, 40, 50}, 15, 50},
+		{[]float64{}, math.NaN(), math.NaN()},
+		{[]float64{10, 20, 5, 30, 15}, 5, 30},
+	}
+
+	for _, tc := range tt {
+		min, max := Bounds(tc.xs)
+
+		if len(tc.xs) == 0 {
+			assert.True(t, math.IsNaN(min), "min value should be NaN")
+			assert.True(t, math.IsNaN(max), "max value should be NaN")
+		} else {
+			assert.Equal(t, tc.min, min, "min value mismatch")
+			assert.Equal(t, tc.max, max, "max value mismatch")
+		}
+	}
+}
+
+func TestSampleBounds(t *testing.T) {
+	tt := []struct {
+		sample Sample
+		min    float64
+		max    float64
+	}{
+		{Sample{Xs: []float64{15, 20, 35, 40, 50}, Sorted: false}, 15, 50},
+		{Sample{Xs: []float64{}, Sorted: false}, math.NaN(), math.NaN()},
+		{Sample{Xs: []float64{15, 20, 35, 40, 50}, Sorted: true}, 15, 50},
+	}
+
+	for _, tc := range tt {
+		min, max := tc.sample.Bounds()
+
+		if len(tc.sample.Xs) == 0 {
+			assert.True(t, math.IsNaN(min), "min value should be NaN")
+			assert.True(t, math.IsNaN(max), "max value should be NaN")
+		} else {
+			assert.Equal(t, tc.min, min, "min value mismatch")
+			assert.Equal(t, tc.max, max, "max value mismatch")
+		}
+	}
+}
+
+func TestVecSum(t *testing.T) {
+	tt := []struct {
+		xs  []float64
+		sum float64
+	}{
+		{[]float64{15, 20, 35, 40, 50}, 160},
+		{[]float64{}, 0},
+	}
+
+	for _, tc := range tt {
+		sum := vecSum(tc.xs)
+		assert.Equal(t, tc.sum, sum, "sum value mismatch")
+	}
+}
+
+func TestSampleSum(t *testing.T) {
+	tt := []struct {
+		sample Sample
+		sum    float64
+	}{
+		{Sample{Xs: []float64{15, 20, 35, 40, 50}}, 160},
+		{Sample{Xs: []float64{}}, 0},
+	}
+
+	for _, tc := range tt {
+		sum := tc.sample.Sum()
+		assert.Equal(t, tc.sum, sum, "sum value mismatch")
+	}
+}
+
+func TestMean(t *testing.T) {
+	tt := []struct {
+		xs       []float64
+		expected float64
+	}{
+		{[]float64{1, 2, 3, 4, 5}, 3},
+		{[]float64{-1, 0, 1}, 0},
+		{[]float64{}, math.NaN()},
+		{[]float64{10}, 10},
+		{[]float64{-2, 2, -2, 2}, 0},
+	}
+
+	for _, tc := range tt {
+		mean := Mean(tc.xs)
+
+		if math.IsNaN(tc.expected) {
+			assert.True(t, math.IsNaN(mean), "Expected NaN")
+		} else {
+			assert.Equal(t, tc.expected, mean, "Mean value mismatch")
+		}
+	}
+}
+func TestSampleCopy(t *testing.T) {
+	s := Sample{Xs: []float64{15, 20, 35, 40, 50}, Sorted: true}
+	copySample := s.Copy()
+
+	// Modify the original sample and check if the copy remains unchanged
+	s.Xs[0] = 100
+
+	assert.NotEqual(t, s.Xs[0], copySample.Xs[0], "Original and copied samples should not share data")
+	assert.Equal(t, len(s.Xs), len(copySample.Xs), "Length of original and copied samples should be the same")
+	assert.Equal(t, s.Sorted, copySample.Sorted, "Sorting status should be the same")
+}
+
+func TestSampleFilterOutliers(t *testing.T) {
+	s := Sample{Xs: []float64{15, 20, 35, 40, 50, 100, 200}}
+	s.FilterOutliers()
+
+	expected := []float64{15, 20, 35, 40, 50, 100}
+	assert.Equal(t, expected, s.Xs, "FilterOutliers should remove outliers")
+}
+
+func TestSampleClear(t *testing.T) {
+	s := Sample{Xs: []float64{15, 20, 35, 40, 50}, Sorted: true}
+	s.Clear()
+
+	assert.Empty(t, s.Xs, "Clear should reset the sample to contain 0 values")
+	assert.False(t, s.Sorted, "Sorting status should be false after clearing")
+}
+
+func TestSampleSort(t *testing.T) {
+	tt := []struct {
+		sample   Sample
+		expected []float64
+	}{
+		{Sample{Xs: []float64{15, 20, 35, 40, 50}, Sorted: false}, []float64{15, 20, 35, 40, 50}},
+		{Sample{Xs: []float64{}, Sorted: false}, []float64{}},
+		{Sample{Xs: []float64{15, 20, 35, 40, 50}, Sorted: true}, []float64{15, 20, 35, 40, 50}},
+		{Sample{Xs: []float64{10, 5, 30, 20, 15}, Sorted: false}, []float64{5, 10, 15, 20, 30}},
+	}
+
+	for _, tc := range tt {
+		sortedSample := tc.sample.Sort()
+
+		assert.Equal(t, tc.expected, sortedSample.Xs, "Sorted values mismatch")
+	}
+}
+
+func TestGeoMean(t *testing.T) {
+	tt := []struct {
+		name     string
+		values   []float64
+		expected float64
+	}{
+		{
+			name:     "Valid_case",
+			values:   []float64{2, 4, 8, 16},
+			expected: 5.65685424949238,
+		},
+		{
+			name:     "Empty_values",
+			values:   []float64{},
+			expected: math.NaN(),
+		},
+		{
+			name:     "Zero_value",
+			values:   []float64{1, 0, 3},
+			expected: math.NaN(),
+		},
+		{
+			name:     "Negative_value",
+			values:   []float64{2, -4, 8, 16},
+			expected: math.NaN(),
+		},
+	}
+
+	for _, tc := range tt {
+		t.Run(tc.name, func(t *testing.T) {
+			sample := Sample{Xs: tc.values}
+			result := sample.GeoMean()
+
+			if math.IsNaN(tc.expected) {
+				assert.True(t, math.IsNaN(result))
+			} else {
+				assert.Equal(t, tc.expected, result)
+			}
+		})
+	}
+}
diff --git a/go/mathstats/tdist_test.go b/go/mathstats/tdist_test.go
index b30ba95662b..e243126e47b 100644
--- a/go/mathstats/tdist_test.go
+++ b/go/mathstats/tdist_test.go
@@ -4,7 +4,12 @@
 
 package mathstats
 
-import "testing"
+import (
+	"math"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
 
 func TestT(t *testing.T) {
 	testFunc(t, "PDF(%v|v=1)", TDist{1}.PDF, map[float64]float64{
@@ -93,3 +98,17 @@ func TestT(t *testing.T) {
 		8:   0.99975354666971372,
 		9:   0.9998586600128780})
 }
+func TestCDFNan(t *testing.T) {
+	tDist := TDist{V: 1}
+
+	result := tDist.CDF(math.NaN())
+	assert.True(t, math.IsNaN(result), "CDF(NaN) = %v, expected NaN", result)
+}
+
+func TestBounds_tdist(t *testing.T) {
+	tDist := TDist{V: 1}
+
+	lower, upper := tDist.Bounds()
+	assert.Equal(t, -4.0, lower, "Lower bound should be -4")
+	assert.Equal(t, 4.0, upper, "Upper bound should be 4")
+}
diff --git a/go/mathstats/ttest_test.go b/go/mathstats/ttest_test.go
index 0c9b78fdb9f..9c23a24ec29 100644
--- a/go/mathstats/ttest_test.go
+++ b/go/mathstats/ttest_test.go
@@ -4,7 +4,11 @@
 
 package mathstats
 
-import "testing"
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
 
 func TestTTest(t *testing.T) {
 	s1 := Sample{Xs: []float64{2, 1, 3, 4}}
@@ -69,3 +73,160 @@ func TestTTest(t *testing.T) {
 	}, 4, 0, 0, 3,
 		0.5, 1, 0.5)
 }
+
+func TestTwoSampleTTestErrors(t *testing.T) {
+	tt := []struct {
+		name string
+		x1   TTestSample
+		x2   TTestSample
+		alt  LocationHypothesis
+		err  error
+	}{
+		{
+			name: "One sample size is 0",
+			x1:   &Sample{Xs: []float64{1, 2, 3}},
+			x2:   &Sample{Xs: []float64{}},
+			alt:  LocationDiffers,
+			err:  ErrSampleSize,
+		},
+		{
+			name: "Both sample sizes are 0",
+			x1:   &Sample{Xs: []float64{}},
+			x2:   &Sample{Xs: []float64{}},
+			alt:  LocationDiffers,
+			err:  ErrSampleSize,
+		},
+		{
+			name: "One sample has zero variance",
+			x1:   &Sample{Xs: []float64{1}},
+			x2:   &Sample{Xs: []float64{1}},
+			alt:  LocationDiffers,
+			err:  ErrZeroVariance,
+		},
+	}
+
+	for _, tc := range tt {
+		t.Run(tc.name, func(t *testing.T) {
+			result, err := TwoSampleTTest(tc.x1, tc.x2, tc.alt)
+			assert.Equal(t, tc.err, err)
+			assert.Nil(t, result)
+		})
+	}
+}
+
+func TestTwoSampleWelchTTestErrors(t *testing.T) {
+	tt := []struct {
+		name string
+		x1   TTestSample
+		x2   TTestSample
+		alt  LocationHypothesis
+		err  error
+	}{
+		{
+			name: "One sample size is 1",
+			x1:   &Sample{Xs: []float64{1}},
+			x2:   &Sample{Xs: []float64{2, 3, 4}},
+			alt:  LocationDiffers,
+			err:  ErrSampleSize,
+		},
+		{
+			name: "Both sample sizes are 1",
+			x1:   &Sample{Xs: []float64{1}},
+			x2:   &Sample{Xs: []float64{2}},
+			alt:  LocationDiffers,
+			err:  ErrSampleSize,
+		},
+		{
+			name: "One sample has zero variance",
+			x1:   &Sample{Xs: []float64{1, 1, 1}},
+			x2:   &Sample{Xs: []float64{2, 2, 2}},
+			alt:  LocationDiffers,
+			err:  ErrZeroVariance,
+		},
+	}
+
+	for _, tc := range tt {
+		t.Run(tc.name, func(t *testing.T) {
+			result, err := TwoSampleWelchTTest(tc.x1, tc.x2, tc.alt)
+			assert.Equal(t, tc.err, err)
+			assert.Nil(t, result)
+		})
+	}
+}
+
+func TestPairedTTestErrors(t *testing.T) {
+	tt := []struct {
+		name string
+		x1   []float64
+		x2   []float64
+		μ0   float64
+		alt  LocationHypothesis
+		err  error
+	}{
+		{
+			name: "Samples have different lengths",
+			x1:   []float64{1, 2, 3},
+			x2:   []float64{4, 5},
+			μ0:   0,
+			alt:  LocationDiffers,
+			err:  ErrMismatchedSamples,
+		},
+		{
+			name: "Samples have length <= 1",
+			x1:   []float64{1},
+			x2:   []float64{2},
+			μ0:   0,
+			alt:  LocationDiffers,
+			err:  ErrSampleSize,
+		},
+		{
+			name: "Samples result in zero standard deviation",
+			x1:   []float64{1, 1, 1},
+			x2:   []float64{1, 1, 1},
+			μ0:   0,
+			alt:  LocationDiffers,
+			err:  ErrZeroVariance,
+		},
+	}
+
+	for _, tc := range tt {
+		t.Run(tc.name, func(t *testing.T) {
+			result, err := PairedTTest(tc.x1, tc.x2, tc.μ0, tc.alt)
+			assert.Equal(t, tc.err, err)
+			assert.Nil(t, result)
+		})
+	}
+}
+
+func TestOneSampleTTestErrors(t *testing.T) {
+	tt := []struct {
+		name string
+		x    TTestSample
+		μ0   float64
+		alt  LocationHypothesis
+		err  error
+	}{
+		{
+			name: "Sample size is 0",
+			x:    &Sample{Xs: []float64{}},
+			μ0:   0,
+			alt:  LocationDiffers,
+			err:  ErrSampleSize,
+		},
+		{
+			name: "Sample has zero variance",
+			x:    &Sample{Xs: []float64{1, 1, 1}},
+			μ0:   0,
+			alt:  LocationDiffers,
+			err:  ErrZeroVariance,
+		},
+	}
+
+	for _, tc := range tt {
+		t.Run(tc.name, func(t *testing.T) {
+			result, err := OneSampleTTest(tc.x, tc.μ0, tc.alt)
+			assert.Equal(t, tc.err, err)
+			assert.Nil(t, result)
+		})
+	}
+}