diff --git a/docs/src/tutorials/linear_regression.md b/docs/src/tutorials/linear_regression.md
index 7573372fd7..f56a688575 100644
--- a/docs/src/tutorials/linear_regression.md
+++ b/docs/src/tutorials/linear_regression.md
@@ -104,9 +104,9 @@ julia> custom_model(W, b, x)[1], y[1]
 It does! But the predictions are way off. We need to train the model to improve the predictions, but before training the model we need to define the loss function. The loss function would ideally output a quantity that we will try to minimize during the entire training process. Here we will use the mean sum squared error loss function.
 
 ```jldoctest linear_regression_simple; filter = r"[+-]?([0-9]*[.])?[0-9]+(f[+-]*[0-9])?"
-julia> function custom_loss(W, b, x, y)
-           ŷ = custom_model(W, b, x)
-           sum((y .- ŷ).^2) / length(x)
+julia> function custom_loss(weights, biases, features, labels)
+           ŷ = custom_model(weights, biases, features)
+           sum((labels .- ŷ).^2) / length(weights)
        end;
 
 julia> custom_loss(W, b, x, y)
@@ -142,9 +142,9 @@ julia> flux_model(x)[1], y[1]
 It is! The next step would be defining the loss function using `Flux`'s functions -
 
 ```jldoctest linear_regression_simple; filter = r"[+-]?([0-9]*[.])?[0-9]+(f[+-]*[0-9])?"
-julia> function flux_loss(flux_model, x, y)
-           ŷ = flux_model(x)
-           Flux.mse(ŷ, y)
+julia> function flux_loss(flux_model, features, labels)
+           ŷ = flux_model(features)
+           Flux.mse(ŷ, labels)
        end;
 
 julia> flux_loss(flux_model, x, y)
@@ -214,8 +214,8 @@ The loss went down! This means that we successfully trained our model for one ep
 Let's plug our super training logic inside a function and test it again -
 
 ```jldoctest linear_regression_simple; filter = r"[+-]?([0-9]*[.])?[0-9]+(f[+-]*[0-9])?"
-julia> function train_custom_model!(f_loss, weights, biases, X, y)
-           dLdW, dLdb, _, _ = gradient(f_loss, weights, biases, X, y)
+julia> function train_custom_model!(f_loss, weights, biases, features, labels)
+           dLdW, dLdb, _, _ = gradient(f_loss, weights, biases, features, labels)
            @. weights = weights - 0.1 * dLdW
            @. biases = biases - 0.1 * dLdb
        end;
@@ -314,9 +314,9 @@ Dense(13 => 1)      # 14 parameters
 Same as before, our next step would be to define a loss function to quantify our accuracy somehow. The lower the loss, the better the model!
 
 ```jldoctest linear_regression_complex; filter = r"[+-]?([0-9]*[.])?[0-9]+(f[+-]*[0-9])?"
-julia> function loss(model, x, y)
-           ŷ = model(x)
-           Flux.mse(ŷ, y)
+julia> function loss(model, features, labels)
+           ŷ = model(features)
+           Flux.mse(ŷ, labels)
        end;
 
 julia> loss(model, x_train_n, y_train)
@@ -330,8 +330,8 @@ We can now proceed to the training phase!
 The training procedure would make use of the same mathematics, but now we can pass in the model inside the `gradient` call and let `Flux` and `Zygote` handle the derivatives!
 
 ```jldoctest linear_regression_complex
-julia> function train_model!(f_loss, model, X, y)
-           dLdm, _, _ = gradient(f_loss, model, X, y)
+julia> function train_model!(f_loss, model, features, labels)
+           dLdm, _, _ = gradient(f_loss, model, features, labels)
            @. model.weight = model.weight - 0.000001 * dLdm.weight
            @. model.bias = model.bias - 0.000001 * dLdm.bias
        end;
diff --git a/docs/src/tutorials/logistic_regression.md b/docs/src/tutorials/logistic_regression.md
index 56f5cab1dd..51302a5cbf 100644
--- a/docs/src/tutorials/logistic_regression.md
+++ b/docs/src/tutorials/logistic_regression.md
@@ -9,7 +9,7 @@ julia> using Flux, Statistics, MLDatasets, DataFrames, OneHotArrays
 ```
 
 ## Dataset
-Let's start by importing a dataset from MLDatasets.jl. We will use the `Iris` dataset that contains the data of three different `Iris` species. The data consists of 150 data points (`x`s), each having four features. Each of these `x` is mapped to `y`, the name of a particular `Iris` species. The following code will download the `Iris` dataset when run for the first time.
+Let's start by importing a dataset from MLDatasets.jl. We will use the `Iris` dataset that contains the data of three different `Iris` species. The data consists of 150 data points (`x`s), each having four features. Each of these `x` is mapped to a label (or target) `y`, the name of a particular `Iris` species. The following code will download the `Iris` dataset when run for the first time.
 
 ```jldoctest logistic_regression
 julia> Iris()
@@ -170,9 +170,9 @@ julia> custom_logitcrossentropy(ŷ, y) = mean(.-sum(y .* logsoftmax(ŷ; dims = 1
 Now we can wrap the `custom_logitcrossentropy` inside a function that takes in the model parameters, `x`s, and `y`s, and returns the loss value.
 
 ```jldoctest logistic_regression; filter = r"[+-]?([0-9]*[.])?[0-9]+(f[+-]*[0-9])?"
-julia> function custom_loss(W, b, x, y)
-           ŷ = custom_model(W, b, x)
-           custom_logitcrossentropy(ŷ, y)
+julia> function custom_loss(weights, biases, features, labels_onehot)
+           ŷ = custom_model(weights, biases, features)
+           custom_logitcrossentropy(ŷ, labels_onehot)
        end;
 
 julia> custom_loss(W, b, x, custom_y_onehot)
@@ -184,9 +184,9 @@ The loss function works!
 Flux provides us with many minimal yet elegant loss functions. In fact, the `custom_logitcrossentropy` defined above has been taken directly from Flux. The functions present in Flux includes sanity checks, ensures efficient performance, and behaves well with the overall FluxML ecosystem.
 
 ```jldoctest logistic_regression; filter = r"[+-]?([0-9]*[.])?[0-9]+(f[+-]*[0-9])?"
-julia> function flux_loss(flux_model, x, y)
-           ŷ = flux_model(x)
-           Flux.logitcrossentropy(ŷ, y)
+julia> function flux_loss(flux_model, features, labels_onehot)
+           ŷ = flux_model(features)
+           Flux.logitcrossentropy(ŷ, labels_onehot)
        end;
 
 julia> flux_loss(flux_model, x, flux_y_onehot)
@@ -214,8 +214,8 @@ julia> max_idx = [x[1] for x in argmax(custom_y_onehot; dims=1)]
 Now we can write a function that calculates the indices of the maximum element in each column, and maps them to a class name.
 
 ```jldoctest logistic_regression
-julia> function custom_onecold(y)
-           max_idx = [x[1] for x in argmax(y; dims=1)]
+julia> function custom_onecold(labels_onehot)
+           max_idx = [x[1] for x in argmax(labels_onehot; dims=1)]
            return vec(classes[max_idx])
        end;
 
@@ -313,8 +313,8 @@ julia> custom_loss(W, b, x, custom_y_onehot)
 The loss went down! Let's plug our super training logic inside a function.
 
 ```jldoctest logistic_regression
-julia> function train_custom_model!(f_loss, weights, biases, X, y)
-           dLdW, dLdb, _, _ = gradient(f_loss, weights, biases, X, y)
+julia> function train_custom_model!(f_loss, weights, biases, features, labels_onehot)
+           dLdW, dLdb, _, _ = gradient(f_loss, weights, biases, features, labels_onehot)
            weights .= weights .- 0.1 .* dLdW
            biases .= biases .- 0.1 .* dLdb
        end;
@@ -347,8 +347,8 @@ We can write a similar-looking training loop for our `flux_model` and train it s
 julia> flux_loss(flux_model, x, flux_y_onehot)
 1.215731131385928
 
-julia> function train_flux_model!(f_loss, model, X, y)
-           dLdm, _, _ = gradient(f_loss, model, X, y)
+julia> function train_flux_model!(f_loss, model, features, labels_onehot)
+           dLdm, _, _ = gradient(f_loss, model, features, labels_onehot)
            @. model[1].weight = model[1].weight - 0.1 * dLdm[:layers][1][:weight]
            @. model[1].bias = model[1].bias - 0.1 * dLdm[:layers][1][:bias]
        end;