build2.py

import sklearn as skl
import mdp
import yaml
import random
import re
import os
import tempfile
import ssl
import cv2
import numpy as np
from matplotlib import pyplot as plt
import argparse
import time
import skops.io as sio
import timeit
import pickle

#parser = argparse.ArgumentParser()
#parser.add_argument('--config', type=str, required=True, help='Path to config file')
#args = parser.parse_args()

#with open(args.config, 'r') as f:
#    config = yaml.safe_load(f)

config_name = "config1"
#temporary config load
with open(f"{config_name}/{config_name}.yaml", "r") as f:
    config = yaml.safe_load(f)


print(config)
print(config['input_shape'])


def load_images_in_batches(folder, batch_size):
    # Get the filenames in the folder
    filenames = os.listdir(folder)

    # Sort the filenames
    filenames.sort(key=lambda x: float(x.split('_')[1].split('.')[0]))

    # Prepare an empty array to store the images
    batch_images = np.empty((config["batch_size"], 40, 320, 3))

    # Loop over each filename
    for i, filename in enumerate(filenames):
        # Build the full file path
        file_path = os.path.join(folder, filename)

        # Load the image with OpenCV
        img = cv2.imread(file_path)

        # Convert the image from BGR to RGB (OpenCV loads images in BGR format)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # Store the image in the batch
        batch_images[i % batch_size] = img

        # If the batch is full, yield it and start a new one
        if (i + 1) % batch_size == 0:
            yield batch_images
            batch_images = np.empty((batch_size, 40, 320, 3))

    # Yield the last batch (which might be smaller than batch_size)
    yield batch_images[:i % batch_size + 1]


# Usage
image_batches = load_images_in_batches(config["data_path"], config["batch_size"])

#checking array shape [length, width, height, RGB]
print("Data loaded")


#Standardising values


#LAYER 1

#Creating the dim red. sfa layout
switchboard1 = mdp.hinet.Rectangular2dSwitchboard(in_channels_xy=(tuple(config["input_shape"][0])),
                                                 field_channels_xy=config["field_size"][0],
                                                 field_spacing_xy=config["stride"][0],
                                                 in_channel_dim=3)
print(switchboard1.out_channel_dim)
#defining the dim red. sfa layer node
sfa1_node = mdp.nodes.SFANode(input_dim=switchboard1.out_channel_dim,
                              output_dim=32)
#Creating Subsequent Nodes in layer
node1_2 = mdp.nodes.QuadraticExpansionNode() #Quadratic expansion node

node1_3 = mdp.nodes.NormalNoiseNode(noise_args= (0,config["noise"])) #Adding gaussian noise with 0.05 variance

node1_4 = mdp.nodes.SFANode(output_dim=32) #Extraction node with dimensionality remaining the same

node1_5 = mdp.nodes.CutoffNode(lower_bound=-4,
                               upper_bound=4,) #Clipping node, values between +-4

flow1 = mdp.Flow([sfa1_node,
                  node1_2,
                  node1_3,
                  node1_4,
                  node1_5])
#Transforming into a nestable FlowNode format
layer1 = mdp.hinet.FlowNode(flow1)

#specifying sfa node onto layout n = 441
layout1 = mdp.hinet.CloneLayer(node=layer1,
                               n_nodes=switchboard1.output_channels)

#Creating the dimension reduction layer as a feedforward object
dim_red1 = mdp.Flow([switchboard1,layout1])

#LAYER 2

#Creating the dim red. sfa layout
switchboard2 = mdp.hinet.Rectangular2dSwitchboard(in_channels_xy=(tuple(config["input_shape"][1])),
                                                 field_channels_xy=config["field_size"][1],
                                                 field_spacing_xy=config["stride"][1],
                                                 in_channel_dim=32)
#defining the dim red. sfa layer node

sfa2_node = mdp.nodes.SFANode(input_dim=switchboard2.out_channel_dim,
                              output_dim=32)
#Creating Subsequent Nodes in layer
node2_2 = mdp.nodes.QuadraticExpansionNode() #Quadratic expansion node
node2_3 = mdp.nodes.NormalNoiseNode(noise_args= (0,config["noise"])) #Adding gaussian noise with 0.05 variance
node2_4 = mdp.nodes.SFANode(output_dim=32) #Extraction node with dimensionality remaining the same
node2_5 = mdp.nodes.CutoffNode(lower_bound=-4,
                               upper_bound=4) #Clipping node, values between +-4
flow2 = mdp.Flow([sfa2_node,
                  node2_2,
                  node2_3,
                  node2_4,
                  node2_5])
#Transforming into a nestable FlowNode format
layer2 = mdp.hinet.FlowNode(flow2)

#specifying sfa node onto layout n = 30
layout2 = mdp.hinet.CloneLayer(node=layer2,
                               n_nodes=switchboard2.output_channels)

#Creating the dimension reduction layer as a feedforward object
dim_red2 = mdp.Flow([switchboard2,layout2])

#LAYER 3

#Creating the dim red. sfa layout
switchboard3 = mdp.hinet.Rectangular2dSwitchboard(in_channels_xy=(tuple(config["input_shape"][2])),
                                                 field_channels_xy=config["field_size"][2],
                                                 field_spacing_xy=config["stride"][2],
                                                 in_channel_dim=32)
#defining the dim red. sfa layer node
sfa3_node = mdp.nodes.SFANode(input_dim=switchboard3.out_channel_dim,
                              output_dim=32) #todo check if rank deficit method PCA needed
#Creating Subsequent Nodes in layer
node3_2 = mdp.nodes.QuadraticExpansionNode() #Quadratic expansion node
node3_3 = mdp.nodes.NormalNoiseNode(noise_args= (0,config["noise"])) #Adding gaussian noise with 0.05 variance
node3_4 = mdp.nodes.SFANode(output_dim=32) #Extraction node with dimensionality remaining the same #todo (Add rank deificit method?)
node3_5 = mdp.nodes.CutoffNode(lower_bound=-4,
                               upper_bound=4) #Clipping node, values between +-4
flow3 = mdp.Flow([sfa3_node,
                  node3_2,
                  node3_3,
                  node3_4,
                  node3_5])
#Transforming into a nestable FlowNode format
layer3 = mdp.hinet.FlowNode(flow3)

#specifying sfa node onto layout n = 30
layout3 = mdp.hinet.CloneLayer(node=layer3,
                               n_nodes=switchboard3.output_channels)

#Creating the dimension reduction layer as a feedforward object
dim_red3 = mdp.Flow([switchboard3,layout3])


#LAYER 4 (ICA)
layer4 = mdp.nodes.CuBICANode(input_dim=32)

print("Model Ready")
n = 0
network = [dim_red1, dim_red2, dim_red3, layer4]
for batch in image_batches:
    os.mkdir(path=f"{config_name}/layers/batch{n}")

    # Flattening array (into 1D)
    batch = batch.reshape(config["batch_size"], 38400)
    # Scaling data
    batch = ((batch - batch.mean()) / (batch.std()))

    st = time.time()
    #TRAINING
    flow1.train(batch)
    dat2 = flow1(batch)
    print("Layer 1 trained...")

    #training second layer
    dim_red2.train(dat2)
    dat3 = dim_red2(dat2)
    print("Layer 2 trained...")

    #training 3rd layer
    dim_red3.train(dat3)
    dat4 = dim_red3(dat3)
    print("Layer 3 trained...")

    #training ICA
    layer4.train(dat4)
    et = time.time()
    print(f"Batch {n} processed in {round(et -st,2)} seconds")
    print(f"Per image time = {round(((et-st)/config['batch_size'])*1000,2)} ms")

    # Save model layers one by one
    for j, layer in enumerate(network):
        with open(f"{config_name}/layers/batch{n}/{layer}{j}.pic", "wb") as f:
            pickle.dump(layer, f)
    n = n + 1