diff --git a/ml/decision_tree.v b/ml/decision_tree.v new file mode 100644 index 000000000..83606216a --- /dev/null +++ b/ml/decision_tree.v @@ -0,0 +1,207 @@ +module ml + +import math + +pub struct Sample { +pub mut: + features []f64 +pub: + label int +} + +pub struct Dataset { +pub mut: + samples []Sample +pub: + n_features int + n_classes int +} + +struct Node { +mut: + feature int + threshold f64 + label int + left &Node + right &Node +} + +pub struct DecisionTree { +mut: + root &Node + max_depth int + min_samples_split int +} + +pub fn DecisionTree.new(max_depth int, min_samples_split int) &DecisionTree { + return &DecisionTree{ + root: &Node(unsafe { nil }) + max_depth: max_depth + min_samples_split: min_samples_split + } +} + +pub fn index_of_max(arr []int) int { + mut max_index := 0 + for i := 1; i < arr.len; i++ { + if arr[i] > arr[max_index] { + max_index = i + } + } + return max_index +} + +pub fn create_dataset(n_features int, n_classes int) &Dataset { + return &Dataset{ + samples: []Sample{} + n_features: n_features + n_classes: n_classes + } +} + +pub fn (mut dataset Dataset) add_sample(features []f64, label int) bool { + if label < 0 || label >= dataset.n_classes { + return false + } + dataset.samples << Sample{ + features: features.clone() + label: label + } + return true +} + +pub fn (dataset &Dataset) calculate_entropy() f64 { + mut class_counts := []int{len: dataset.n_classes, init: 0} + for sample in dataset.samples { + class_counts[sample.label]++ + } + + mut entropy := 0.0 + for count in class_counts { + if count > 0 { + p := f64(count) / f64(dataset.samples.len) + entropy -= p * math.log2(p) + } + } + return entropy +} + +fn find_best_split(dataset &Dataset) (int, f64, f64) { + mut best_gain := -1.0 + mut best_feature := 0 + mut best_threshold := 0.0 + + for feature in 0 .. dataset.n_features { + for sample in dataset.samples { + threshold := sample.features[feature] + mut left := create_dataset(dataset.n_features, dataset.n_classes) + mut right := create_dataset(dataset.n_features, dataset.n_classes) + + for s in dataset.samples { + if s.features[feature] <= threshold { + left.add_sample(s.features, s.label) + } else { + right.add_sample(s.features, s.label) + } + } + + if left.samples.len > 0 && right.samples.len > 0 { + p_left := f64(left.samples.len) / f64(dataset.samples.len) + p_right := f64(right.samples.len) / f64(dataset.samples.len) + gain := dataset.calculate_entropy() - (p_left * left.calculate_entropy() + + p_right * right.calculate_entropy()) + + if gain > best_gain { + best_gain = gain + best_feature = feature + best_threshold = threshold + } + } + } + } + + return best_feature, best_threshold, best_gain +} + +fn build_tree(dataset &Dataset, max_depth int, min_samples_split int) &Node { + if dataset.samples.len < min_samples_split || max_depth == 0 { + mut class_counts := []int{len: dataset.n_classes, init: 0} + for sample in dataset.samples { + class_counts[sample.label]++ + } + label := index_of_max(class_counts) + return &Node{ + feature: -1 + threshold: 0 + label: label + left: &Node(unsafe { nil }) + right: &Node(unsafe { nil }) + } + } + + best_feature, best_threshold, best_gain := find_best_split(dataset) + + if best_gain <= 0 { + mut class_counts := []int{len: dataset.n_classes, init: 0} + for sample in dataset.samples { + class_counts[sample.label]++ + } + label := index_of_max(class_counts) + return &Node{ + feature: -1 + threshold: 0 + label: label + left: &Node(unsafe { nil }) + right: &Node(unsafe { nil }) + } + } + + mut left := create_dataset(dataset.n_features, dataset.n_classes) + mut right := create_dataset(dataset.n_features, dataset.n_classes) + + for sample in dataset.samples { + if sample.features[best_feature] <= best_threshold { + left.add_sample(sample.features, sample.label) + } else { + right.add_sample(sample.features, sample.label) + } + } + + left_subtree := build_tree(left, max_depth - 1, min_samples_split) + right_subtree := build_tree(right, max_depth - 1, min_samples_split) + + return &Node{ + feature: best_feature + threshold: best_threshold + label: -1 + left: left_subtree + right: right_subtree + } +} + +pub fn (mut dt DecisionTree) train(dataset &Dataset) { + dt.root = build_tree(dataset, dt.max_depth, dt.min_samples_split) +} + +pub fn (dt &DecisionTree) predict(features []f64) int { + return predict_recursive(dt.root, features) +} + +fn predict_recursive(node &Node, features []f64) int { + if node.left == unsafe { nil } && node.right == unsafe { nil } { + return node.label + } + + if features[node.feature] <= node.threshold { + return predict_recursive(node.left, features) + } else { + return predict_recursive(node.right, features) + } +} + +pub fn calculate_information_gain(parent &Dataset, left &Dataset, right &Dataset) f64 { + p_left := f64(left.samples.len) / f64(parent.samples.len) + p_right := f64(right.samples.len) / f64(parent.samples.len) + return parent.calculate_entropy() - (p_left * left.calculate_entropy() + + p_right * right.calculate_entropy()) +} diff --git a/ml/decision_tree_test.v b/ml/decision_tree_test.v new file mode 100644 index 000000000..9be2efbd9 --- /dev/null +++ b/ml/decision_tree_test.v @@ -0,0 +1,93 @@ +module ml + +import math + +fn test_decision_tree_creation() { + max_depth := 3 + min_samples_split := 2 + dt := DecisionTree.new(max_depth, min_samples_split) + assert dt.max_depth == max_depth + assert dt.min_samples_split == min_samples_split +} + +fn test_dataset_creation() { + n_features := 3 + n_classes := 4 + dataset := create_dataset(n_features, n_classes) + assert dataset.n_features == n_features + assert dataset.n_classes == n_classes + assert dataset.samples.len == 0 +} + +fn test_add_sample() { + mut dataset := create_dataset(3, 4) + features := [1.0, 2.0, 3.0] + label := 2 + assert dataset.add_sample(features, label) == true + assert dataset.samples.len == 1 + assert dataset.samples[0].features == features + assert dataset.samples[0].label == label + + // Test invalid label + assert dataset.add_sample(features, 5) == false + assert dataset.samples.len == 1 +} + +fn test_entropy_calculation() { + mut dataset := create_dataset(3, 4) + dataset.add_sample([1.0, 2.0, 0.5], 0) + dataset.add_sample([2.0, 3.0, 1.0], 1) + dataset.add_sample([3.0, 4.0, 1.5], 2) + dataset.add_sample([4.0, 5.0, 2.0], 3) + dataset.add_sample([2.5, 3.5, 1.2], 1) + + entropy := dataset.calculate_entropy() + expected_entropy := 1.9219280948873623 // Manually calculated + assert math.abs(entropy - expected_entropy) < 1e-6 +} + +fn test_decision_tree_training_and_prediction() { + mut dataset := create_dataset(3, 4) + dataset.add_sample([1.0, 2.0, 0.5], 0) + dataset.add_sample([2.0, 3.0, 1.0], 1) + dataset.add_sample([3.0, 4.0, 1.5], 2) + dataset.add_sample([4.0, 5.0, 2.0], 3) + dataset.add_sample([2.5, 3.5, 1.2], 1) + + mut dt := DecisionTree.new(3, 2) + dt.train(dataset) + + // Test predictions + assert dt.predict([2.5, 3.5, 1.3]) == 1 // Manually calculated +} + +fn test_information_gain() { + mut parent := create_dataset(3, 3) + parent.add_sample([2.0, 3.5, 1.1], 0) + parent.add_sample([3.0, 4.0, 1.5], 1) + parent.add_sample([1.5, 2.0, 0.5], 0) + parent.add_sample([2.5, 3.0, 1.0], 1) + parent.add_sample([4.0, 5.0, 2.0], 2) + + mut left := create_dataset(3, 3) + left.add_sample([2.0, 3.5, 1.1], 0) + left.add_sample([1.5, 2.0, 0.5], 0) + + mut right := create_dataset(3, 3) + right.add_sample([3.0, 4.0, 1.5], 1) + right.add_sample([2.5, 3.0, 1.0], 1) + right.add_sample([4.0, 5.0, 2.0], 2) + + info_gain := calculate_information_gain(parent, left, right) + expected_gain := 0.9709505944546686 // Manually calculated + assert math.abs(info_gain - expected_gain) < 1e-6 +} + +fn main() { + test_decision_tree_creation() + test_dataset_creation() + test_add_sample() + test_entropy_calculation() + test_decision_tree_training_and_prediction() + test_information_gain() +} diff --git a/ml/random_forest.v b/ml/random_forest.v new file mode 100644 index 000000000..0f21632d8 --- /dev/null +++ b/ml/random_forest.v @@ -0,0 +1,165 @@ +module ml + +import rand + +pub struct RandomForest { +pub mut: + trees []DecisionTree + n_trees int + max_depth int + min_samples_split int + feature_subset_size int +} + +pub fn RandomForest.new(n_trees int, max_depth int, min_samples_split int, feature_subset_size int) &RandomForest { + return &RandomForest{ + trees: []DecisionTree{} + n_trees: n_trees + max_depth: max_depth + min_samples_split: min_samples_split + feature_subset_size: feature_subset_size + } +} + +fn bootstrap_sample(dataset &Dataset) &Dataset { + mut bootstrap := create_dataset(dataset.n_features, dataset.n_classes) + for _ in 0 .. dataset.samples.len { + sample_index := rand.intn(dataset.samples.len) or { 0 } + sample := dataset.samples[sample_index] + bootstrap.add_sample(sample.features, sample.label) + } + return bootstrap +} + +fn select_feature_subset(n_features int, subset_size int) []int { + mut features := []int{len: n_features, init: index} + rand.shuffle(mut features) or { return features[..subset_size] } + return features[..subset_size] +} + +pub fn (mut rf RandomForest) train(dataset &Dataset) { + for _ in 0 .. rf.n_trees { + mut tree := DecisionTree.new(rf.max_depth, rf.min_samples_split) + bootstrap := bootstrap_sample(dataset) + + feature_subset := select_feature_subset(dataset.n_features, rf.feature_subset_size) + + tree.train_with_feature_subset(bootstrap, feature_subset) + rf.trees << tree + } +} + +pub fn (rf &RandomForest) predict(features []f64) int { + if rf.trees.len == 0 { + return -1 + } + + mut votes := []int{len: rf.trees[0].root.label + 1, init: 0} + for tree in rf.trees { + prediction := tree.predict(features) + if prediction >= 0 && prediction < votes.len { + votes[prediction]++ + } + } + return index_of_max(votes) +} + +pub fn (mut dt DecisionTree) train_with_feature_subset(dataset &Dataset, feature_subset []int) { + dt.root = build_tree_with_feature_subset(dataset, dt.max_depth, dt.min_samples_split, + feature_subset) +} + +fn build_tree_with_feature_subset(dataset &Dataset, max_depth int, min_samples_split int, feature_subset []int) &Node { + if dataset.samples.len < min_samples_split || max_depth == 0 { + mut class_counts := []int{len: dataset.n_classes, init: 0} + for sample in dataset.samples { + class_counts[sample.label]++ + } + label := index_of_max(class_counts) + return &Node{ + feature: -1 + threshold: 0 + label: label + left: &Node(unsafe { nil }) + right: &Node(unsafe { nil }) + } + } + + best_feature, best_threshold, best_gain := find_best_split_with_subset(dataset, feature_subset) + + if best_gain <= 0 { + mut class_counts := []int{len: dataset.n_classes, init: 0} + for sample in dataset.samples { + class_counts[sample.label]++ + } + label := index_of_max(class_counts) + return &Node{ + feature: -1 + threshold: 0 + label: label + left: &Node(unsafe { nil }) + right: &Node(unsafe { nil }) + } + } + + mut left := create_dataset(dataset.n_features, dataset.n_classes) + mut right := create_dataset(dataset.n_features, dataset.n_classes) + + for sample in dataset.samples { + if sample.features[best_feature] <= best_threshold { + left.add_sample(sample.features, sample.label) + } else { + right.add_sample(sample.features, sample.label) + } + } + + left_subtree := build_tree_with_feature_subset(left, max_depth - 1, min_samples_split, + feature_subset) + right_subtree := build_tree_with_feature_subset(right, max_depth - 1, min_samples_split, + feature_subset) + + return &Node{ + feature: best_feature + threshold: best_threshold + label: -1 + left: left_subtree + right: right_subtree + } +} + +fn find_best_split_with_subset(dataset &Dataset, feature_subset []int) (int, f64, f64) { + mut best_gain := -1.0 + mut best_feature := 0 + mut best_threshold := 0.0 + + for feature in feature_subset { + for sample in dataset.samples { + threshold := sample.features[feature] + mut left := create_dataset(dataset.n_features, dataset.n_classes) + mut right := create_dataset(dataset.n_features, dataset.n_classes) + + for s in dataset.samples { + if s.features[feature] <= threshold { + left.add_sample(s.features, s.label) + } else { + right.add_sample(s.features, s.label) + } + } + + if left.samples.len > 0 && right.samples.len > 0 { + p_left := f64(left.samples.len) / f64(dataset.samples.len) + p_right := f64(right.samples.len) / f64(dataset.samples.len) + gain := dataset.calculate_entropy() - (p_left * left.calculate_entropy() + + p_right * right.calculate_entropy()) + + if gain > best_gain { + best_gain = gain + best_feature = feature + best_threshold = threshold + } + } + } + } + + return best_feature, best_threshold, best_gain +} diff --git a/ml/random_forest_test.v b/ml/random_forest_test.v new file mode 100644 index 000000000..a3bf9d2d6 --- /dev/null +++ b/ml/random_forest_test.v @@ -0,0 +1,62 @@ +module ml + +import rand + +fn test_random_forest_creation() { + rf := RandomForest.new(10, 5, 2, 3) + assert rf.n_trees == 10 + assert rf.max_depth == 5 + assert rf.min_samples_split == 2 + assert rf.feature_subset_size == 3 + assert rf.trees.len == 0 +} + +fn test_bootstrap_sample() { + mut dataset := create_dataset(5, 2) + for i in 0 .. 100 { + dataset.add_sample([f64(i), f64(i * 2), f64(i * 3), f64(i * 4), f64(i * 5)], i % 2) + } + + bootstrap := bootstrap_sample(dataset) + assert bootstrap.samples.len == dataset.samples.len + assert bootstrap.n_features == dataset.n_features + assert bootstrap.n_classes == dataset.n_classes +} + +fn test_select_feature_subset() { + n_features := 10 + subset_size := 5 + subset := select_feature_subset(n_features, subset_size) + assert subset.len == subset_size + assert subset.all(it >= 0 && it < n_features) +} + +fn test_random_forest_train_and_predict() { + mut dataset := create_dataset(4, 2) + for i in 0 .. 1000 { + if i % 2 == 0 { + dataset.add_sample([f64(i), f64(i * 2), f64(i * 3), f64(i * 4)], 0) + } else { + dataset.add_sample([f64(i), f64(i * 2), f64(i * 3), f64(i * 4)], 1) + } + } + + mut rf := RandomForest.new(10, 5, 5, 2) + rf.train(dataset) + + assert rf.trees.len == 10 + + for i in 0 .. 100 { + features := [f64(i * 10), f64(i * 20), f64(i * 30), f64(i * 40)] + prediction := rf.predict(features) + assert prediction == 0 || prediction == 1 + } +} + +fn main() { + test_random_forest_creation() + test_bootstrap_sample() + test_select_feature_subset() + test_random_forest_train_and_predict() + println('All Random Forest tests passed successfully!') +} diff --git a/ml/svm.v b/ml/svm.v new file mode 100644 index 000000000..e858a5bf6 --- /dev/null +++ b/ml/svm.v @@ -0,0 +1,254 @@ +module ml + +import math +import rand + +pub struct SVMConfig { +pub mut: + max_iterations int = 1000 + learning_rate f64 = 0.01 + tolerance f64 = 1e-6 + c f64 = 1.0 + kernel_type KernelType = .linear + kernel_param f64 = 1.0 +} + +pub enum KernelType { + linear + polynomial + rbf + quadratic + custom +} + +pub type KernelFunction = fn ([]f64, []f64) f64 + +pub struct DataPoint { +pub mut: + x []f64 + y int +} + +pub struct SVMModel { +pub mut: + support_vectors []DataPoint + alphas []f64 + b f64 + kernel KernelFunction = linear_kernel + config SVMConfig +} + +pub struct MulticlassSVM { +pub mut: + models [][]&SVMModel +} + +pub fn linear_kernel(x []f64, y []f64) f64 { + return dot_product(x, y) +} + +pub fn polynomial_kernel(degree f64) KernelFunction { + return fn [degree] (x []f64, y []f64) f64 { + return math.pow(dot_product(x, y) + 1.0, degree) + } +} + +pub fn rbf_kernel(gamma f64) KernelFunction { + return fn [gamma] (x []f64, y []f64) f64 { + diff := vector_subtract(x, y) + return math.exp(-gamma * dot_product(diff, diff)) + } +} + +pub fn quadratic_kernel(x []f64, y []f64) f64 { + dot := dot_product(x, y) + return dot * dot +} + +pub fn custom_kernel(x []f64, y []f64) f64 { + z_x := math.pow(x[0], 2) + math.pow(x[1], 2) + z_y := math.pow(y[0], 2) + math.pow(y[1], 2) + return z_x * z_y +} + +fn dot_product(a []f64, b []f64) f64 { + mut sum := 0.0 + for i := 0; i < a.len; i++ { + sum += a[i] * b[i] + } + return sum +} + +fn vector_subtract(a []f64, b []f64) []f64 { + mut result := []f64{len: a.len} + for i := 0; i < a.len; i++ { + result[i] = a[i] - b[i] + } + return result +} + +pub fn train_svm(data []DataPoint, config SVMConfig) &SVMModel { + kernel := match config.kernel_type { + .linear { linear_kernel } + .polynomial { polynomial_kernel(config.kernel_param) } + .rbf { rbf_kernel(config.kernel_param) } + .quadratic { quadratic_kernel } + .custom { custom_kernel } + } + + mut model := &SVMModel{ + config: config + kernel: kernel + } + + mut alphas := []f64{len: data.len, init: 0.0} + mut b := 0.0 + + for _ in 0 .. config.max_iterations { + mut alpha_pairs_changed := 0 + + for i := 0; i < data.len; i++ { + ei := predict_raw(model, data[i].x) - f64(data[i].y) + if (data[i].y * ei < -config.tolerance && alphas[i] < config.c) + || (data[i].y * ei > config.tolerance && alphas[i] > 0) { + mut j := rand.intn(data.len - 1) or { 0 } + if j >= i { + j += 1 + } + + ej := predict_raw(model, data[j].x) - f64(data[j].y) + + old_alpha_i, old_alpha_j := alphas[i], alphas[j] + l, h := compute_l_h(config.c, alphas[i], alphas[j], data[i].y, data[j].y) + + if l == h { + continue + } + + eta := 2 * kernel(data[i].x, data[j].x) - kernel(data[i].x, data[i].x) - kernel(data[j].x, + data[j].x) + if eta >= 0 { + continue + } + + alphas[j] -= f64(data[j].y) * (ei - ej) / eta + alphas[j] = math.max(l, math.min(h, alphas[j])) + + if math.abs(alphas[j] - old_alpha_j) < 1e-5 { + continue + } + + alphas[i] += f64(data[i].y * data[j].y) * (old_alpha_j - alphas[j]) + + b1 := b - ei - data[i].y * (alphas[i] - old_alpha_i) * kernel(data[i].x, + data[i].x) - data[j].y * (alphas[j] - old_alpha_j) * kernel(data[i].x, + data[j].x) + b2 := b - ej - data[i].y * (alphas[i] - old_alpha_i) * kernel(data[i].x, + data[j].x) - data[j].y * (alphas[j] - old_alpha_j) * kernel(data[j].x, + data[j].x) + + if 0 < alphas[i] && alphas[i] < config.c { + b = b1 + } else if 0 < alphas[j] && alphas[j] < config.c { + b = b2 + } else { + b = (b1 + b2) / 2 + } + + alpha_pairs_changed += 1 + } + } + + if alpha_pairs_changed == 0 { + break + } + } + + model.b = b + model.alphas = alphas + mut support_vectors := []DataPoint{} + for i, d in data { + if alphas[i] > 0 { + support_vectors << d + } + } + model.support_vectors = support_vectors + + return model +} + +fn compute_l_h(c f64, alpha_i f64, alpha_j f64, y_i int, y_j int) (f64, f64) { + if y_i != y_j { + return math.max(0.0, alpha_j - alpha_i), math.min(c, c + alpha_j - alpha_i) + } else { + return math.max(0.0, alpha_i + alpha_j - c), math.min(c, alpha_i + alpha_j) + } +} + +pub fn predict_raw(model &SVMModel, x []f64) f64 { + mut result := 0.0 + for i, sv in model.support_vectors { + result += model.alphas[i] * f64(sv.y) * model.kernel(x, sv.x) + } + return result + model.b +} + +pub fn predict(model &SVMModel, x []f64) int { + return if predict_raw(model, x) >= 0 { 1 } else { -1 } +} + +pub fn train_multiclass_svm(data []DataPoint, config SVMConfig) &MulticlassSVM { + mut classes := []int{} + for point in data { + if point.y !in classes { + classes << point.y + } + } + + mut models := [][]&SVMModel{len: classes.len, init: []&SVMModel{}} + + for i := 0; i < classes.len; i++ { + models[i] = []&SVMModel{len: classes.len, init: 0} + for j := i + 1; j < classes.len; j++ { + mut binary_data := []DataPoint{} + for point in data { + if point.y == classes[i] || point.y == classes[j] { + binary_data << DataPoint{ + x: point.x + y: if point.y == classes[i] { 1 } else { -1 } + } + } + } + models[i][j] = train_svm(binary_data, config) + } + } + + return &MulticlassSVM{ + models: models + } +} + +pub fn predict_multiclass(model &MulticlassSVM, x []f64) int { + mut votes := map[int]int{} + for i := 0; i < model.models.len; i++ { + for j := i + 1; j < model.models.len; j++ { + prediction := predict(model.models[i][j], x) + if prediction == 1 { + votes[i]++ + } else { + votes[j]++ + } + } + } + + mut max_votes := 0 + mut predicted_class := 0 + for class, vote_count in votes { + if vote_count > max_votes { + max_votes = vote_count + predicted_class = class + } + } + + return predicted_class +} diff --git a/ml/svm_test.v b/ml/svm_test.v new file mode 100644 index 000000000..517d51790 --- /dev/null +++ b/ml/svm_test.v @@ -0,0 +1,180 @@ +module ml + +import rand +import math + +fn test_linear_kernel() { + x := [1.0, 2.0] + y := [2.0, 3.0] + result := linear_kernel(x, y) + expected := dot_product(x, y) + assert math.abs(result - expected) < 1e-6 + println('Linear kernel test passed.') +} + +fn test_polynomial_kernel() { + x := [1.0, 2.0] + y := [2.0, 3.0] + degree := 2.0 + kernel_fn := polynomial_kernel(degree) + result := kernel_fn(x, y) + expected := math.pow(dot_product(x, y) + 1.0, degree) + assert math.abs(result - expected) < 1e-6 + println('Polynomial kernel test passed.') +} + +fn test_rbf_kernel() { + x := [1.0, 2.0] + y := [2.0, 3.0] + gamma := 0.5 + kernel_fn := rbf_kernel(gamma) + result := kernel_fn(x, y) + diff := vector_subtract(x, y) + expected := math.exp(-gamma * dot_product(diff, diff)) + assert math.abs(result - expected) < 1e-6 + println('RBF kernel test passed.') +} + +fn test_quadratic_kernel() { + x := [1.0, 2.0] + y := [2.0, 3.0] + result := quadratic_kernel(x, y) + expected := math.pow(dot_product(x, y), 2) + assert math.abs(result - expected) < 1e-6 + println('Quadratic kernel test passed.') +} + +fn test_custom_kernel() { + x := [1.0, 2.0] + y := [2.0, 3.0] + result := custom_kernel(x, y) + z_x := math.pow(x[0], 2) + math.pow(x[1], 2) + z_y := math.pow(y[0], 2) + math.pow(y[1], 2) + expected := z_x * z_y + assert math.abs(result - expected) < 1e-6 + println('Custom kernel test passed.') +} + +fn test_dot_product() { + a := [1.0, 2.0] + b := [3.0, 4.0] + result := dot_product(a, b) + expected := 11.0 + assert math.abs(result - expected) < 1e-6 + println('Dot product test passed.') +} + +fn test_vector_subtract() { + a := [1.0, 2.0] + b := [3.0, 4.0] + result := vector_subtract(a, b) + expected := [-2.0, -2.0] + assert result == expected + println('Vector subtract test passed.') +} + +fn test_svm() { + data := [ + DataPoint{ + x: [2.0, 3.0] + y: 1 + }, + DataPoint{ + x: [1.0, 2.0] + y: -1 + }, + DataPoint{ + x: [3.0, 3.0] + y: 1 + }, + DataPoint{ + x: [2.0, 1.0] + y: -1 + }, + ] + + config := SVMConfig{ + max_iterations: 100 + learning_rate: 0.01 + tolerance: 1e-5 + c: 1.0 + kernel_type: .linear + } + + model := train_svm(data, config) + + mut predictions := 0 + for point in data { + pred := predict(model, point.x) + if pred == point.y { + predictions += 1 + } + } + + assert predictions == data.len + + println('SVM model training and prediction test passed.') +} + +fn test_multiclass_svm() { + data := [ + DataPoint{ + x: [1.0, 2.0] + y: 0 + }, + DataPoint{ + x: [2.0, 3.0] + y: 1 + }, + DataPoint{ + x: [3.0, 1.0] + y: 2 + }, + DataPoint{ + x: [1.5, 2.5] + y: 0 + }, + DataPoint{ + x: [2.5, 3.5] + y: 1 + }, + DataPoint{ + x: [3.5, 1.5] + y: 2 + }, + ] + + config := SVMConfig{ + max_iterations: 100 + learning_rate: 0.01 + tolerance: 1e-5 + c: 1.0 + kernel_type: .linear + } + + multiclass_model := train_multiclass_svm(data, config) + + mut correct_predictions := 0 + for point in data { + predicted_class := predict_multiclass(multiclass_model, point.x) + if predicted_class == point.y { + correct_predictions += 1 + } + } + + assert correct_predictions == data.len + + println('Multiclass SVM model training and prediction test passed.') +} + +fn main() { + test_linear_kernel() + test_polynomial_kernel() + test_rbf_kernel() + test_quadratic_kernel() + test_custom_kernel() + test_dot_product() + test_vector_subtract() + test_svm() + test_multiclass_svm() +}