diff --git a/regression/README.md b/regression/README.md
index f1f9067a30..c99e7237b4 100644
--- a/regression/README.md
+++ b/regression/README.md
@@ -1,3 +1,8 @@
 # Linear regression example
 
 Trains a single fully-connected layer to fit a 4th degree polynomial.
+
+```bash
+pip install -r requirements.txt
+python main.py
+```
diff --git a/regression/main.py b/regression/main.py
index cb41ad70ec..dcbd233e4b 100755
--- a/regression/main.py
+++ b/regression/main.py
@@ -1,23 +1,47 @@
-#!/usr/bin/env python
-from __future__ import print_function
-from itertools import count
-
+import argparse
 import torch
 import torch.nn.functional as F
+import torch.optim as optim
+from torch.optim.lr_scheduler import StepLR
 
+# Polynomial degree and target weights/bias
 POLY_DEGREE = 4
 W_target = torch.randn(POLY_DEGREE, 1) * 5
 b_target = torch.randn(1) * 5
 
 
+def parse_args():
+    """Command line arguments"""
+    parser = argparse.ArgumentParser(description='Polynomial Regression Example')
+    parser.add_argument('--batch-size', type=int, default=32, metavar='N',
+                        help='input batch size for training (default: 32)')
+    parser.add_argument('--epochs', type=int, default=100, metavar='N',
+                        help='number of epochs to train (default: 100)')
+    parser.add_argument('--lr', type=float, default=0.1, metavar='LR',
+                        help='learning rate (default: 0.1)')
+    parser.add_argument('--gamma', type=float, default=0.7, metavar='M',
+                        help='Learning rate step gamma (default: 0.7)')
+    parser.add_argument('--no-cuda', action='store_true', default=False,
+                        help='disables CUDA training')
+    parser.add_argument('--log-interval', type=int, default=10, metavar='N',
+                        help='how many batches to wait before logging training status')
+    parser.add_argument('--save-model', action='store_true', default=False,
+                        help='For saving the current model')
+    parser.add_argument('--dry-run', action='store_true', default=False,
+                        help='quickly check a single pass')
+    parser.add_argument('--seed', type=int, default=1, metavar='S',
+                        help='random seed (default: 1)')
+    return parser.parse_args()
+
+
 def make_features(x):
     """Builds features i.e. a matrix with columns [x, x^2, x^3, x^4]."""
     x = x.unsqueeze(1)
-    return torch.cat([x ** i for i in range(1, POLY_DEGREE+1)], 1)
+    return torch.cat([x ** i for i in range(1, POLY_DEGREE + 1)], 1)
 
 
 def f(x):
-    """Approximated function."""
+    """Approximated function. function f(x) = W_target * x + b_target"""
     return x.mm(W_target) + b_target.item()
 
 
@@ -38,31 +62,86 @@ def get_batch(batch_size=32):
     return x, y
 
 
-# Define model
-fc = torch.nn.Linear(W_target.size(0), 1)
+class PolyRegressor(torch.nn.Module):
+    """Define the model (simple linear regression)"""
+    def __init__(self):
+        super(PolyRegressor, self).__init__()
+        self.fc = torch.nn.Linear(POLY_DEGREE, 1)
+
+    def forward(self, x):
+        return self.fc(x)
+
+
+def train(args, model, device, optimizer, epoch, log_interval=10):
+    """Training loop"""
+    model.train()
+    for batch_idx in range(1, args.epochs + 1):
+        # Get a batch of data
+        batch_x, batch_y = get_batch(args.batch_size)
+        batch_x, batch_y = batch_x.to(device), batch_y.to(device)
+
+        # Reset gradients
+        optimizer.zero_grad()
+
+        # Forward pass
+        output = model(batch_x)
+        loss = F.smooth_l1_loss(output, batch_y)
+
+        # Backward pass
+        loss.backward()
+
+        # Apply gradients
+        optimizer.step()
+
+        if batch_idx % log_interval == 0:
+            print(f'Epoch {epoch} Batch {batch_idx}/{args.epochs} Loss: {loss.item():.6f}')
+
+        # Dry run for a quick check
+        if args.dry_run:
+            break
+
+
+def test(model, device):
+    """Test function (in this case, we'll use it to print the learned function)"""
+    model.eval()
+    model.to(device)
+    with torch.no_grad():
+        print('==> Learned function:')
+        print(poly_desc(model.fc.weight.view(-1), model.fc.bias))
+        print('==> Actual function:')
+        print(poly_desc(W_target.view(-1), b_target))
+
+
+def main():
+    args = parse_args()
+
+    # Set the random seed
+    torch.manual_seed(args.seed)
+
+    # Select the device (GPU/CPU)
+    use_cuda = not args.no_cuda and torch.cuda.is_available()
+    device = torch.device("cuda" if use_cuda else "cpu")
 
-for batch_idx in count(1):
-    # Get data
-    batch_x, batch_y = get_batch()
+    # Initialize the model, optimizer and scheduler
+    model = PolyRegressor().to(device)
+    optimizer = optim.SGD(model.parameters(), lr=args.lr)
+    scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)
 
-    # Reset gradients
-    fc.zero_grad()
+    # Training loop
+    for epoch in range(1, args.epochs + 1):
+        train(args, model, device, optimizer, epoch, args.log_interval)
+        scheduler.step()
 
-    # Forward pass
-    output = F.smooth_l1_loss(fc(batch_x), batch_y)
-    loss = output.item()
+        # Print the learned function after each epoch
+        test(model, device)
 
-    # Backward pass
-    output.backward()
+        if args.save_model:
+            torch.save(model.state_dict(), "polynomial_regressor.pt")
 
-    # Apply gradients
-    for param in fc.parameters():
-        param.data.add_(-0.1 * param.grad)
+    print("Training complete.")
+    if args.save_model:
+        print("Model saved to polynomial_regressor.pt")
 
-    # Stop criterion
-    if loss < 1e-3:
-        break
 
-print('Loss: {:.6f} after {} batches'.format(loss, batch_idx))
-print('==> Learned function:\t' + poly_desc(fc.weight.view(-1), fc.bias))
-print('==> Actual function:\t' + poly_desc(W_target.view(-1), b_target))
+if __name__ == '__main__':
+    main()
diff --git a/regression/requirements.txt b/regression/requirements.txt
new file mode 100644
index 0000000000..6cec7414dc
--- /dev/null
+++ b/regression/requirements.txt
@@ -0,0 +1,2 @@
+torch
+torchvision==0.20.0