You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Visualization Python & R Machine Learning, Deep Learning, Reinforcement Learning
First principle
Find the same probability distribution in a large amount of data and make predictions based on the same probability distribution: y = f(x)
Just like learning a function relationship, the inverse function or reverse engineering function requires DL. You just know that the data has a certain pattern and then guess what the original function that generated the data is. For example, you train to get a calculator neural network.
The idea of high-dimensional space: the code is cut into high-dimensional space, and then a very detailed high-dimensional classification is done to separate it. Then the search is also high-dimensional, just like the code, it is entered into the treesitter to do training to obtain logical learning relationships. Most of NLP is a multi-classification problem in high-dimensional space.
Collect the input x and output y around you as training data, and mine their mapping relationship f(x) at any time. You can use GPT to generate certain data for your model training needs or write crawler to get you need data.
importnumpyasnpimportmatplotlib.pyplotasplt# Example data pointsX=np.array([1, 2.2, 3, 4, 5])
y=np.array([2, 4, 6.3, 8, 11])
# Add a column of ones to X for the intercept term (bias)X_b=np.c_[np.ones((X.shape[0], 1)), X] # X_b is X with a bias column# Calculate the best fit line parameters using the Normal Equationtheta_best=np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)
# Print the parameters (intercept and slope)print(f"Intercept: {theta_best[0]}")
print(f"Slope: {theta_best[1]}")
# Predict values using the modely_pred=X_b.dot(theta_best)
# Plot the data points and the best fit lineplt.scatter(X, y, color='blue', label='Data points')
plt.plot(X, y_pred, color='red', label='Best fit line')
plt.xlabel('X')
plt.ylabel('y')
plt.legend()
plt.show()
least squares method by neural network
importtorchimporttorch.nnasnnimporttorch.optimasoptimimportmatplotlib.pyplotasplt# graph show the pytorch torch.optim.Adam and plot it How it works# Define a simple linear modelclassLinearModel(nn.Module):
def__init__(self):
super(LinearModel, self).__init__()
self.linear=nn.Linear(1, 1)
defforward(self, x):
returnself.linear(x)
# Initialize the model, loss function, and optimizermodel=LinearModel()
criterion=nn.MSELoss()
optimizer=optim.Adam(model.parameters(), lr=0.01)
# Generate some synthetic data (y = 2x + 1 with some noise)x_train=torch.linspace(-1, 1, 100).reshape(-1, 1)
y_train=2*x_train+1+0.2*torch.randn(x_train.size())
# List to store the loss valuesloss_values= []
# Training loopforepochinrange(1000):
model.train()
optimizer.zero_grad()
outputs=model(x_train)
loss=criterion(outputs, y_train)
loss.backward()
optimizer.step()
loss_values.append(loss.item())
nonlinear fitting
importtorchimporttorch.nnasnnimportnumpyasnpimportmatplotlib.pyplotasplt# Step 1: Generate a 100-length random sequencen=100x=torch.linspace(1, 10, n).unsqueeze(1)
y=torch.sin(x) +torch.rand(n, 1) *0.5# Step 2: Define a simple neural network model for nonlinear fittingclassNonlinearModel(nn.Module):
def__init__(self):
super(NonlinearModel, self).__init__()
self.fc1=nn.Linear(1, 10)
self.fc2=nn.Linear(10, 10)
self.fc3=nn.Linear(10, 1)
defforward(self, x):
x=torch.relu(self.fc1(x))
x=torch.relu(self.fc2(x))
x=self.fc3(x)
returnxmodel=NonlinearModel()
# Step 3: Define loss function and optimizercriterion=nn.MSELoss()
optimizer=torch.optim.Adam(model.parameters(), lr=0.01)
# Step 4: Train the modelepochs=1000forepochinrange(epochs):
model.train()
# Forward passoutputs=model(x)
loss=criterion(outputs, y)
# Backward pass and optimizationoptimizer.zero_grad()
loss.backward()
optimizer.step()
if (epoch+1) %100==0:
print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')
# Step 5: Plot the original data and the fitted curvemodel.eval()
withtorch.no_grad():
predicted=model(x).numpy()
plt.figure(figsize=(10, 5))
plt.plot(x.numpy(), y.numpy(), 'ro', label='Original data')
plt.plot(x.numpy(), predicted, 'b-', label='Fitted curve')
plt.legend()
plt.show()
polar coordinate classification
importtorchimporttorch.nnasnnimporttorch.optimasoptimfromtorch.utils.dataimportDataLoader, TensorDatasetimportmatplotlib.pyplotaspltfrommpl_toolkits.mplot3dimportAxes3D# Helper function to convert Cartesian to Polar coordinatesdefcartesian_to_polar(x, y, z):
r=torch.sqrt(x**2+y**2+z**2)
theta=torch.atan2(y, x)
phi=torch.acos(z/r)
returnr, theta, phi# Example data generation (replace with your actual data)n_samples=5000x=torch.randn(n_samples)
y=torch.randn(n_samples)
z=torch.randn(n_samples)
labels=torch.randint(0, 4, (n_samples,)) # Four classes (0, 1, 2, 3)# Convert to polar coordinatesr, theta, phi=cartesian_to_polar(x, y, z)
# Combine into a single tensordata=torch.stack((r, theta, phi), dim=1)
# Create a Dataset and DataLoaderdataset=TensorDataset(data, labels)
train_loader=DataLoader(dataset, batch_size=32, shuffle=True)
# Define a simple feedforward neural networkclassPolarNet(nn.Module):
def__init__(self):
super(PolarNet, self).__init__()
self.fc1=nn.Linear(3, 64)
self.fc2=nn.Linear(64, 128)
self.fc3=nn.Linear(128, 4) # Four output classesdefforward(self, x):
x=torch.relu(self.fc1(x))
x=torch.relu(self.fc2(x))
x=self.fc3(x)
returnx# Initialize the model, loss function, and optimizermodel=PolarNet()
criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(), lr=0.001)
# Training loopforepochinrange(20): # Number of epochsforinputs, targetsintrain_loader:
# Forward passoutputs=model(inputs)
loss=criterion(outputs, targets)
# Backward pass and optimizationoptimizer.zero_grad()
loss.backward()
optimizer.step()
print(f'Epoch {epoch+1}/20, Loss: {loss.item()}')
# After training, evaluate the model on the entire dataset for visualizationwithtorch.no_grad():
predicted_labels=model(data).argmax(dim=1)
# Plotting the results in 3Dfig=plt.figure()
ax=fig.add_subplot(111, projection='3d')
# Convert polar back to Cartesian for plottingx_cartesian=r*torch.sin(phi) *torch.cos(theta)
y_cartesian=r*torch.sin(phi) *torch.sin(theta)
z_cartesian=r*torch.cos(phi)
# Plot the 3D scatter plotscatter=ax.scatter(x_cartesian, y_cartesian, z_cartesian, c=predicted_labels, cmap='viridis', marker='o')
# Add color bar and labelsplt.colorbar(scatter, ax=ax)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
plt.title('3D Visualization of PolarNet Classifications')
plt.show()
model=Net()
### 3. Load the Trained Model Weightsmodel.load_state_dict(torch.load("mnist_model.pth"))
model.eval() # Set the model to evaluation mode### 4. Prepare the Handwritten Input Image#You need to preprocess the handwritten image to match the format of the MNIST dataset (28x28 pixels, grayscale).defpreprocess_image(image_path):
transform=transforms.Compose([
transforms.Grayscale(), # Ensure the image is grayscaletransforms.Resize((28, 28)), # Resize to 28x28 pixelstransforms.ToTensor(), # Convert to tensortransforms.Normalize((0.1307,), (0.3081,)) # Normalize with the same mean and std as MNIST
])
image=Image.open(image_path)
image=transform(image).unsqueeze(0) # Add batch dimensionreturnimage### 5. Perform Inferencedefrecognize_digit(image_path):
image=preprocess_image(image_path)
withtorch.no_grad():
output=model(image)
prediction=output.argmax(dim=1, keepdim=True)
returnprediction.item()
# Example usageimage_path='path_to_your_handwritten_digit_image3.png'predicted_digit=recognize_digit(image_path)
print(f'Predicted Digit: {predicted_digit}')
calculator neural network
importtorchimporttorch.nnasnnimporttorch.optimasoptimimportrandomimportnumpyasnp# Define the neural network architectureclassCalculatorNN(nn.Module):
def__init__(self):
super(CalculatorNN, self).__init__()
self.fc1=nn.Linear(3, 128) # Input: 2 numbers + operationself.fc2=nn.Linear(128, 64)
self.fc3=nn.Linear(64, 1) # Output: the resultdefforward(self, x):
x=torch.relu(self.fc1(x))
x=torch.relu(self.fc2(x))
x=self.fc3(x)
returnxmodel=CalculatorNN()
criterion=nn.MSELoss()
optimizer=optim.Adam(model.parameters(), lr=0.001)
# Training loopnum_epochs=50000# loss is too large if is 5000.forepochinrange(num_epochs):
model.train()
# Forward passpredictions=model(X_train)
loss=criterion(predictions, y_train)
# Backward pass and optimizationoptimizer.zero_grad()
loss.backward()
optimizer.step()
if (epoch+1) %10==0:
print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
# ---- usemodel=CalculatorNN()
model.load_state_dict(torch.load('calculator_model.pth'))
model.eval()
# Perform the predictionwithtorch.no_grad():
# Prepare the input (32 * 3)input_data=torch.tensor([[32.0, 3.0, 2]], dtype=torch.float32) # 2 corresponds to multiplicationprediction=model(input_data)
print(f'Prediction for 32 * 3: {prediction.item():.4f}')
importtorchimporttorch.nnasnnimporttorch.optimasoptimimporttorch.nn.functionalasFimportnumpyasnpimportrandom# Define a simple fully connected neural networkclassDQN(nn.Module):
def__init__(self, input_dim, output_dim):
super(DQN, self).__init__()
self.fc1=nn.Linear(input_dim, 128)
self.fc2=nn.Linear(128, 128)
self.fc3=nn.Linear(128, output_dim)
defforward(self, x):
x=F.relu(self.fc1(x))
x=F.relu(self.fc2(x))
returnself.fc3(x)
# ### 3. **Initialize the environment and model:**importgymnasiumasgymimporttorchenv=gym.make("LunarLander-v2", render_mode="human")
state_dim=env.observation_space.shape[0]
action_dim=env.action_space.n# Create the DQN modelmodel=DQN(input_dim=state_dim, output_dim=action_dim)
# ### 4. **Define the training loop:**# In this section, we'll define how the agent interacts with the environment, how rewards are collected, and how the model is updated.# Parameterslearning_rate=0.001gamma=0.99# Discount factorepsilon=1.0# Exploration rateepsilon_decay=0.995epsilon_min=0.01episodes=500# Optimizeroptimizer=optim.Adam(model.parameters(), lr=learning_rate)
# Function to choose action (using epsilon-greedy policy)defchoose_action(state, epsilon):
ifnp.random.rand() <=epsilon:
returnnp.random.choice(action_dim) # Random actionstate=torch.FloatTensor(state).unsqueeze(0)
withtorch.no_grad():
q_values=model(state)
returntorch.argmax(q_values).item()
# Function to train the modeldeftrain_model(memory, batch_size=64):
iflen(memory) <batch_size:
return# Randomly sample a batch from memorybatch=random.sample(memory, batch_size)
# Extract states, actions, rewards, next_states, and dones from the batchstates, actions, rewards, next_states, dones=zip(*batch)
# Convert them to tensorsstates=torch.FloatTensor(states)
actions=torch.LongTensor(actions)
rewards=torch.FloatTensor(rewards)
next_states=torch.FloatTensor(next_states)
dones=torch.FloatTensor(dones)
# Compute Q values for the current statesq_values=model(states).gather(1, actions.unsqueeze(1)).squeeze(1)
# Compute the maximum Q values for the next statesnext_q_values=model(next_states).max(1)[0]
# Compute the target Q valuesq_targets=rewards+ (1-dones) *gamma*next_q_values# Compute the lossloss=F.mse_loss(q_values, q_targets)
# Optimize the modeloptimizer.zero_grad()
loss.backward()
optimizer.step()
# Main loopmemory= []
forepisodeinrange(episodes):
state=env.reset()[0]
total_reward=0fortinrange(1000):
action=choose_action(state, epsilon)
next_state, reward, done, truncated, _=env.step(action)
memory.append((state, action, reward, next_state, done))
train_model(memory)
state=next_statetotal_reward+=rewardifdoneortruncated:
breakepsilon=max(epsilon_min, epsilon*epsilon_decay)
print(f"Episode {episode+1}, Total Reward: {total_reward}")
env.close()
importtorchimportnumpyasnpimportmatplotlib.pyplotaspltfrommpl_toolkits.mplot3dimportAxes3Dfrommatplotlib.animationimportFuncAnimation# Random 3D surface (loss function)defloss_function(x, y):
returntorch.sin(x) *torch.cos(y) +0.1* (x**2+y**2)
# Generate a meshgrid for plotting the surfacex=torch.linspace(-5, 5, 100)
y=torch.linspace(-5, 5, 100)
X, Y=torch.meshgrid(x, y)
Z=loss_function(X, Y).detach().numpy()
# Initialize figure and 3D axis for animationfig=plt.figure(figsize=(10, 7))
ax=fig.add_subplot(111, projection='3d')
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
ax.set_title('SGD Optimization Path on 3D Surface')
# Plot the static 3D surfaceax.plot_surface(X.numpy(), Y.numpy(), Z, cmap='viridis', alpha=0.7)
# SGD starting pointstart_point=torch.tensor([4.0, 4.0], requires_grad=True)
# Hyperparameterslearning_rate=0.1optimizer=torch.optim.SGD([start_point], lr=learning_rate)
# Number of steps and animation framessteps=10path=np.zeros((steps, 3))
# Plotting the initial point on the surfacepoint_plot, =ax.plot([], [], [], color='r', marker='o', markersize=5)
# Function to update the frame during animationdefupdate(i):
globalstart_pointoptimizer.zero_grad()
# Calculate the loss (z value)loss=loss_function(start_point[0], start_point[1])
# Backpropagation to compute gradientsloss.backward()
# Perform optimization stepoptimizer.step()
# Store the (x, y, z) valuespath[i, 0] =start_point[0].item()
path[i, 1] =start_point[1].item()
path[i, 2] =loss.item()
# Update point on the surfacepoint_plot.set_data(path[:i+1, 0], path[:i+1, 1])
point_plot.set_3d_properties(path[:i+1, 2])
returnpoint_plot,
# Animate SGD for 10 stepsani=FuncAnimation(fig, update, frames=steps, interval=500, blit=True)
# Show the animationplt.show()
importtorchimporttorch.nnasnnimporttorch.optimasoptimfromtorch.utils.dataimportDataset, DataLoaderclassVocab:
def__init__(self, stoi, itos):
self.stoi=stoiself.itos=itos# Provided corpus (AI history)corpus="""The history of artificial intelligence (AI) began in antiquity, with myths, stories and rumors of artificial beings endowed with intelligence or consciousness by master craftsmen.... ..."""# Simple tokenization (splitting by spaces)corpus=corpus.replace("\n", " ") # Remove newlines# Tokenization can be improved using libraries like nltk or spacy, but we'll use a simple split heretokens=corpus.split()
# You can build a vocabulary from this corpus as you did before, for instance:fromcollectionsimportCounter# Create a vocabulary from the corpustoken_counts=Counter(tokens)
vocab_stoi= {token: idxforidx, (token, count) inenumerate(token_counts.items())}
vocab_itos= {idx: tokenfortoken, idxinvocab_stoi.items()}
# Create the Vocab objectvocab=Vocab(stoi=vocab_stoi, itos=vocab_itos)
classRNNModel(nn.Module):
def__init__(self, vocab_size, embed_size, hidden_size, num_layers):
super(RNNModel, self).__init__()
self.num_layers=num_layersself.hidden_size=hidden_sizeself.embedding=nn.Embedding(vocab_size, embed_size)
self.rnn=nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True)
self.fc=nn.Linear(hidden_size, vocab_size)
defforward(self, x, hidden):
x=self.embedding(x)
out, hidden=self.rnn(x, hidden)
out=self.fc(out)
returnout, hiddendefinit_hidden(self, batch_size):
# Initialize hidden states (h_0) and cell states (c_0) with correct batch sizeweight=next(self.parameters()).datareturn (weight.new_zeros(self.num_layers, batch_size, self.hidden_size),
weight.new_zeros(self.num_layers, batch_size, self.hidden_size))
classTextDataset(Dataset):
def__init__(self, text, vocab, sequence_length):
self.vocab=vocabself.sequence_length=sequence_lengthself.data=self.tokenize_and_encode(text)
deftokenize_and_encode(self, text):
tokens=text.split() # Simple tokenization (split by spaces)return [self.vocab.stoi[token] fortokenintokensiftokeninself.vocab.stoi]
def__len__(self):
returnlen(self.data) -self.sequence_lengthdef__getitem__(self, idx):
x=self.data[idx:idx+self.sequence_length]
y=self.data[idx+1:idx+1+self.sequence_length]
returntorch.tensor(x, dtype=torch.long), torch.tensor(y, dtype=torch.long)
# Define sequence length and batch sizesequence_length=10# Can be tunedbatch_size=100# Create the dataset and dataloaderdataset=TextDataset(corpus, vocab, sequence_length)
train_loader=DataLoader(dataset, batch_size=batch_size, shuffle=True)
# Now you're ready to train the model using the provided corpus# Define model, loss function, and optimizervocab_size=len(vocab.stoi)
embed_size=50# Adjust as neededhidden_size=100# Adjust as needednum_layers=2num_epochs=100# Adjust based on performancelearning_rate=0.001model=RNNModel(vocab_size, embed_size, hidden_size, num_layers)
criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(), lr=learning_rate)
# Training loopforepochinrange(num_epochs):
forbatchintrain_loader:
inputs, targets=batchbatch_size=inputs.size(0) # Get the actual batch size for this iterationhidden=model.init_hidden(batch_size) # Initialize hidden state with correct batch sizeoutputs, hidden=model(inputs, hidden)
loss=criterion(outputs.view(-1, vocab_size), targets.view(-1))
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f'Epoch {epoch+1}, Loss: {loss.item()}')
torch.save(model.state_dict(), 'rnn_model_ai.pth')
defgenerate_text(model, start_text, max_length=100):
model.eval()
hidden=model.init_hidden(1) # Start with batch size 1input=torch.tensor([[vocab.stoi[start_text]]]) # Convert start_text to input tensorresult= [start_text]
for_inrange(max_length):
output, hidden=model(input, hidden)
prob=nn.functional.softmax(output[0, -1], dim=0).datanext_word=torch.multinomial(prob, 1).item()
result.append(vocab.itos[next_word]) # Convert back to word using vocabinput=torch.tensor([[next_word]]) # Feed the next word as inputreturn' '.join(result)
start_text='AI'# The starting wordgenerated_text=generate_text(model, start_text, max_length=100)
print(generated_text)
Seq2seq number translator
importtorchimporttorch.nnasnnimporttorch.optimasoptimimportnumpyasnpimportrandomimportmatplotlib.pyplotaspltimportrandomclassNumeralTranslationDataset:
def__init__(self):
# Comprehensive mapping of Arabic numerals to English wordsself.num_to_words= {
'0': 'zero', '1': 'one', '2': 'two', '3': 'three', '4': 'four',
'5': 'five', '6': 'six', '7': 'seven', '8': 'eight', '9': 'nine',
'10': 'ten', '11': 'eleven', '12': 'twelve', '13': 'thirteen',
'14': 'fourteen', '15': 'fifteen', '16': 'sixteen',
'17': 'seventeen', '18': 'eighteen', '19': 'nineteen',
'20': 'twenty', '21': 'twenty one', '22': 'twenty two',
'23': 'twenty three', '24': 'twenty four', '25': 'twenty five',
'30': 'thirty', '31': 'thirty one', '32': 'thirty two',
'33': 'thirty three', '34': 'thirty four', '35': 'thirty five',
'40': 'forty', '41': 'forty one', '42': 'forty two',
'43': 'forty three', '44': 'forty four', '45': 'forty five',
'50': 'fifty', '51': 'fifty one', '52': 'fifty two',
'53': 'fifty three', '54': 'fifty four', '55': 'fifty five',
'60': 'sixty', '61': 'sixty one', '62': 'sixty two',
'63': 'sixty three', '64': 'sixty four', '65': 'sixty five',
'70': 'seventy', '71': 'seventy one', '72': 'seventy two',
'73': 'seventy three', '74': 'seventy four', '75': 'seventy five',
'80': 'eighty', '81': 'eighty one', '82': 'eighty two',
'83': 'eighty three', '84': 'eighty four', '85': 'eighty five',
'90': 'ninety', '91': 'ninety one', '92': 'ninety two',
'93': 'ninety three', '94': 'ninety four', '95': 'ninety five'
}
defgenerate_training_data(self, num_examples=1000):
"""Generate random training data for number translation."""input_sequences= []
target_sequences= []
# Generate random numbers from 0 to 99 for a total of `num_examples` examplesfor_inrange(num_examples):
num=random.randint(0, 99) # Randomly pick a number from 0 to 99num_str=str(num)
# Translate to wordsifnuminself.num_to_words:
word=self.num_to_words[num_str]
elifnum<20:
# Handle teensunits=str(num%10)
word=self.num_to_words[units]
else:
# Handle 21-99tens=str((num//10) *10)
units=str(num%10)
tens_word=self.num_to_words[tens]
units_word=self.num_to_words[units] ifunits!='0'else''word=f"{tens_word}{units_word}".strip()
input_sequences.append(list(num_str))
target_sequences.append(list(word))
returninput_sequences, target_sequencesclassEncoder(nn.Module):
def__init__(self, input_size, embedding_dim, hidden_dim):
super(Encoder, self).__init__()
self.embedding=nn.Embedding(input_size, embedding_dim)
self.gru=nn.GRU(embedding_dim, hidden_dim, batch_first=True, num_layers=2, dropout=0.2)
defforward(self, x):
embedded=self.embedding(x)
outputs, hidden=self.gru(embedded)
returnoutputs, hiddenclassAttention(nn.Module):
def__init__(self, hidden_dim):
super(Attention, self).__init__()
self.attn=nn.Linear(hidden_dim*2, hidden_dim)
self.v=nn.Parameter(torch.rand(hidden_dim))
defforward(self, hidden, encoder_outputs):
# hidden = [batch size, hidden dim]# encoder_outputs = [batch size, seq len, hidden dim]batch_size=encoder_outputs.shape[0]
src_len=encoder_outputs.shape[1]
# Repeat hidden state src_len timeshidden=hidden.unsqueeze(1).repeat(1, src_len, 1)
# Concatenate hidden state with encoder outputsenergy=torch.tanh(self.attn(torch.cat((hidden, encoder_outputs), dim=-1)))
# Compute attention scoresattention=torch.sum(self.v*energy, dim=-1)
returntorch.softmax(attention, dim=1)
classDecoder(nn.Module):
def__init__(self, output_size, embedding_dim, hidden_dim):
super(Decoder, self).__init__()
self.embedding=nn.Embedding(output_size, embedding_dim)
self.attention=Attention(hidden_dim)
self.gru=nn.GRU(embedding_dim+hidden_dim, hidden_dim, batch_first=True, num_layers=2, dropout=0.2)
self.fc_out=nn.Linear(hidden_dim, output_size)
defforward(self, input, hidden, encoder_outputs):
# input = [batch size, 1]# hidden = [batch size, hidden dim]# encoder_outputs = [batch size, src len, hidden dim]# Embedding inputembedded=self.embedding(input)
# Compute attention weightsa=self.attention(hidden[-1], encoder_outputs)
# Apply attention to encoder outputsattended=torch.bmm(a.unsqueeze(1), encoder_outputs).squeeze(1)
# Concatenate embedded input with attended contextrnn_input=torch.cat((embedded.squeeze(1), attended), dim=1).unsqueeze(1)
# GRU stepoutput, hidden=self.gru(rnn_input, hidden)
# Predictionprediction=self.fc_out(output.squeeze(1))
returnprediction, hidden, aclassSeq2SeqTranslator(nn.Module):
def__init__(self, input_size, output_size, embedding_dim, hidden_dim):
super(Seq2SeqTranslator, self).__init__()
self.encoder=Encoder(input_size, embedding_dim, hidden_dim)
self.decoder=Decoder(output_size, embedding_dim, hidden_dim)
defforward(self, input_seq, target_seq, teacher_forcing_ratio=0.5):
batch_size=input_seq.size(0)
target_len=target_seq.size(1)
target_vocab_size=self.decoder.fc_out.out_features# Tensor to store decoder outputsoutputs=torch.zeros(batch_size, target_len, target_vocab_size)
# Encoderencoder_outputs, hidden=self.encoder(input_seq)
# First decoder inputdecoder_input=torch.zeros(batch_size, 1, dtype=torch.long)
# Decodefortinrange(target_len):
decoder_output, hidden, _=self.decoder(decoder_input, hidden, encoder_outputs)
outputs[:, t:t+1, :] =decoder_output.unsqueeze(1)
# Teacher forcingteacher_force=random.random() <teacher_forcing_ratiotop1=decoder_output.argmax(1)
ifteacher_force:
decoder_input=target_seq[:, t:t+1]
else:
decoder_input=top1.unsqueeze(1)
returnoutputsclassNumeralTranslator:
def__init__(self, input_chars, output_chars):
# Create datasetself.dataset=NumeralTranslationDataset()
# Create character to index mappingsself.input_char_to_idx= {char: ifori, charinenumerate(input_chars)}
self.input_idx_to_char= {i: charforchar, iinself.input_char_to_idx.items()}
self.output_char_to_idx= {char: ifori, charinenumerate(output_chars)}
self.output_idx_to_char= {i: charforchar, iinself.output_char_to_idx.items()}
# Hyperparametersself.embedding_dim=128self.hidden_dim=256# Initialize modelself.model=Seq2SeqTranslator(
input_size=len(input_chars),
output_size=len(output_chars),
embedding_dim=self.embedding_dim,
hidden_dim=self.hidden_dim
)
self.criterion=nn.CrossEntropyLoss()
self.optimizer=optim.Adam(self.model.parameters(), lr=0.001)
defprepare_sequence(self, seq, char_to_idx):
"""Convert sequence of characters to tensor of indices."""returntorch.tensor([char_to_idx.get(char, 0) forcharinseq], dtype=torch.long)
defpad_sequences(self, sequences, pad_token):
"""Pad sequences to equal length."""# Convert sequences to lists if they are tensorssequences= [seq.tolist() iftorch.is_tensor(seq) elseseqforseqinsequences]
max_len=max(len(seq) forseqinsequences)
padded= []
forseqinsequences:
padded.append(seq+ [pad_token] * (max_len-len(seq)))
returntorch.tensor(padded, dtype=torch.long)
deftrain(self, epochs=300, batch_size=32):
"""Train the translation model."""# Generate training datainput_sequences, target_sequences=self.dataset.generate_training_data()
# Prepare input and target sequencesinput_chars= [list(str(seq)) forseqininput_sequences]
target_chars= [list(seq) forseqintarget_sequences]
# Get character sets for input and outputinput_chars_set=sorted(set(''.join([''.join(seq) forseqininput_chars])))
output_chars_set=sorted(set(''.join([''.join(seq) forseqintarget_chars])))
print("Input characters:", input_chars_set)
print("Output characters:", output_chars_set)
# Training loopepoch_losses= []
forepochinrange(epochs):
total_loss=0# Shuffle datacombined=list(zip(input_chars, target_chars))
random.shuffle(combined)
input_chars, target_chars=zip(*combined)
foriinrange(0, len(input_chars), batch_size):
batch_input=input_chars[i:i+batch_size]
batch_target=target_chars[i:i+batch_size]
# Prepare input sequencesinput_seqs=self.pad_sequences(
[self.prepare_sequence(seq, self.input_char_to_idx) forseqinbatch_input],
pad_token=0
)
# Prepare target sequencestarget_seqs=self.pad_sequences(
[self.prepare_sequence(seq, self.output_char_to_idx) forseqinbatch_target],
pad_token=0
)
# Zero gradientsself.optimizer.zero_grad()
# Forward passoutputs=self.model(input_seqs, target_seqs)
# Compute lossloss=self.criterion(
outputs.view(-1, outputs.size(-1)),
target_seqs.view(-1)
)
# Backward passloss.backward()
self.optimizer.step()
total_loss+=loss.item()
# Record average epoch lossavg_loss=total_loss/ (len(input_chars) //batch_size)
epoch_losses.append(avg_loss)
# Print progressifepoch%10==0:
print(f'Epoch {epoch}, Loss: {avg_loss:.4f}')
# Visualize training lossself.plot_training_loss(epoch_losses)
returnepoch_lossesdeftranslate(self, input_number):
"""Translate a single number to words."""# Prepare input sequenceinput_seq=self.prepare_sequence(list(str(input_number)), self.input_char_to_idx)
input_seq=input_seq.unsqueeze(0) # Add batch dimension# Create dummy target sequence of zerosmax_output_length=10# Maximum expected word lengthdummy_target=torch.zeros(1, max_output_length, dtype=torch.long)
# Disable gradient computationwithtorch.no_grad():
# Get model outputsoutputs=self.model(input_seq, dummy_target)
# Get the most likely output characterspredicted_indices=outputs.argmax(dim=-1)
# Convert indices back to characterspredicted_chars= []
foriinrange(predicted_indices.size(1)):
char_idx=predicted_indices[0, i].item()
char=self.output_idx_to_char[char_idx]
ifchar!='<pxad>': # Skip paddingpredicted_chars.append(char)
# Join characters to form a wordreturn''.join(predicted_chars).strip()
defsave_model(self, filepath='numeral_translator.pth'):
"""Save model state."""torch.save({
'model_state_dict': self.model.state_dict(),
'input_char_to_idx': self.input_char_to_idx,
'output_char_to_idx': self.output_char_to_idx
}, filepath)
print(f"Model saved to {filepath}")
defload_model(self, filepath='numeral_translator.pth'):
"""Load model state."""checkpoint=torch.load(filepath)
self.model.load_state_dict(checkpoint['model_state_dict'])
self.input_char_to_idx=checkpoint['input_char_to_idx']
self.output_char_to_idx=checkpoint['output_char_to_idx']
print(f"Model loaded from {filepath}")
defplot_training_loss(self, losses):
"""Visualize training loss."""plt.figure(figsize=(10, 5))
plt.plot(losses, label='Training Loss')
plt.title('Training Loss Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.tight_layout()
plt.savefig('training_loss.png')
plt.close()
defmain():
# Define input and output character setsinput_chars=list('0123456789')
output_chars=list(' abcdefghijklmnopqrstuvwxyz') + ['<pad>']
# Initialize translatortranslator=NumeralTranslator(input_chars, output_chars)
# Train the modelprint("Training model...")
losses=translator.train(epochs=300, batch_size=32)
# Save the trained modeltranslator.save_model()
# Test the model with some examplestest_numbers= ['0', '5', '13', '25', '42', '67', '89', '99']
print("\nTesting translations:")
fornumberintest_numbers:
translation=translator.translate(number)
print(f"{number} -> {translation}")
# Interactive modeprint("\nEnter a number (0-99) to translate or 'q' to quit:")
whileTrue:
user_input=input("> ")
ifuser_input.lower() =='q':
breaktry:
number=int(user_input)
if0<=number<=99:
translation=translator.translate(user_input)
print(f"Translation: {translation}")
else:
print("Please enter a number between 0 and 99")
exceptValueError:
print("Invalid input. Please enter a valid number or 'q' to quit")
if__name__=="__main__":
main()
Transformer generator
importtorchimporttorch.nnasnnimporttorch.optimasoptimfromtorch.utils.dataimportDataset, DataLoaderimportmathclassVocab:
def__init__(self, stoi, itos):
self.stoi=stoiself.itos=itoscorpus="""The history of artificial intelligence (AI) began in antiquity, with myths, stories and rumors of artificial beings endowed with intelligence or consciousness by master craftsmen...."""corpus=corpus.replace("\n", " ")
tokens=corpus.split()
fromcollectionsimportCountertoken_counts=Counter(tokens)
vocab_stoi= {token: idxforidx, (token, count) inenumerate(token_counts.items())}
vocab_itos= {idx: tokenfortoken, idxinvocab_stoi.items()}
vocab=Vocab(stoi=vocab_stoi, itos=vocab_itos)
classPositionalEncoding(nn.Module):
def__init__(self, embed_size, max_len=5000):
super(PositionalEncoding, self).__init__()
self.encoding=torch.zeros(max_len, embed_size)
position=torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
div_term=torch.exp(torch.arange(0, embed_size, 2).float() * (-math.log(10000.0) /embed_size))
self.encoding[:, 0::2] =torch.sin(position*div_term)
self.encoding[:, 1::2] =torch.cos(position*div_term)
self.encoding=self.encoding.unsqueeze(0)
defforward(self, x):
returnx+self.encoding[:, :x.size(1), :].to(x.device)
classTransformerModel(nn.Module):
def__init__(self, vocab_size, embed_size, num_heads, hidden_size, num_layers, dropout=0.1):
super(TransformerModel, self).__init__()
self.embedding=nn.Embedding(vocab_size, embed_size)
self.pos_encoder=PositionalEncoding(embed_size)
encoder_layers=nn.TransformerEncoderLayer(embed_size, num_heads, hidden_size, dropout)
self.transformer=nn.TransformerEncoder(encoder_layers, num_layers)
self.fc=nn.Linear(embed_size, vocab_size)
defforward(self, src, src_mask=None):
src=self.embedding(src) *math.sqrt(src.size(-1)) # scale by sqrt(embed_size)src=self.pos_encoder(src)
output=self.transformer(src, src_mask)
output=self.fc(output)
returnoutputclassTextDataset(Dataset):
def__init__(self, text, vocab, sequence_length):
self.vocab=vocabself.sequence_length=sequence_lengthself.data=self.tokenize_and_encode(text)
deftokenize_and_encode(self, text):
tokens=text.split()
return [self.vocab.stoi[token] fortokenintokensiftokeninself.vocab.stoi]
def__len__(self):
returnlen(self.data) -self.sequence_lengthdef__getitem__(self, idx):
x=self.data[idx:idx+self.sequence_length]
y=self.data[idx+1:idx+1+self.sequence_length]
returntorch.tensor(x, dtype=torch.long), torch.tensor(y, dtype=torch.long)
sequence_length=10batch_size=100dataset=TextDataset(corpus, vocab, sequence_length)
train_loader=DataLoader(dataset, batch_size=batch_size, shuffle=True)
vocab_size=len(vocab.stoi)
embed_size=50# Can be tunednum_heads=2# Number of attention headshidden_size=100# Hidden layer size in feedforward networknum_layers=88# Number of Transformer layersdropout=0.1num_epochs=100# Adjust based on performancelearning_rate=0.001model=TransformerModel(vocab_size, embed_size, num_heads, hidden_size, num_layers, dropout)
criterion=nn.CrossEntropyLoss()
optimizer=optim.Adam(model.parameters(), lr=learning_rate)
forepochinrange(num_epochs):
forbatchintrain_loader:
inputs, targets=batchinputs=inputs.permute(1, 0) # (batch_size, sequence_length) -> (sequence_length, batch_size)targets=targets.permute(1, 0)
outputs=model(inputs)
# Instead of view(), use reshape()loss=criterion(outputs.reshape(-1, vocab_size), targets.reshape(-1))
optimizer.zero_grad()
loss.backward()
optimizer.step()
print(f'Epoch {epoch+1}, Loss: {loss.item()}')
torch.save(model.state_dict(), 'transformer_model_ai.pth')
defgenerate_text(model, start_text, max_length=100):
model.eval()
input=torch.tensor([[vocab.stoi[start_text]]]).permute(1, 0) # Convert start_text to input tensorresult= [start_text]
for_inrange(max_length):
output=model(input)
prob=nn.functional.softmax(output[-1, 0], dim=0).datanext_word=torch.multinomial(prob, 1).item()
result.append(vocab.itos[next_word])
input=torch.cat([input, torch.tensor([[next_word]])], dim=0)
return' '.join(result)
start_text='AI'generated_text=generate_text(model, start_text, max_length=100)
print(generated_text)