diff --git a/.vscode/launch.json b/.vscode/launch.json index 3dfbdda..380375f 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -8,7 +8,7 @@ "name": "Auto - Debbug kimchima", "type": "python", "request": "launch", - "program": "${workspaceFolder}/kimchima.py", + "program": "${workspaceFolder}/src/kimchima/cmds/kimchima_cli.py", "console": "integratedTerminal", "purpose": [ "debug-in-terminal" diff --git a/.vscode/settings.json b/.vscode/settings.json index 184c4eb..d73198f 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -2,7 +2,7 @@ "python.testing.unittestArgs": [ "-v", "-s", - ".", + "./src", "-p", "test_*.py" ], diff --git a/MANIFEST.in b/MANIFEST.in deleted file mode 100644 index b175f76..0000000 --- a/MANIFEST.in +++ /dev/null @@ -1,6 +0,0 @@ -exclude kimchi.yml -exclude Makefile -exclude .vscode -exclude .gitignore -exclude LICENSE -recursive-exclude params * diff --git a/Makefile b/Makefile index 0136e33..66466cd 100644 --- a/Makefile +++ b/Makefile @@ -1,3 +1,7 @@ +################################################################# +TESTDIR:=src/kimchima/tests/ + +##############################Legacy############################# .PHONY: setup setup: python setup.py sdist bdist_wheel @@ -10,6 +14,7 @@ upload: ################################Poetry################################ .PHONY: poetry poetry: + @poetry config virtualenvs.in-project true @pipx install poetry==1.8.2 @@ -25,7 +30,7 @@ install: .PHONY: test test: - @poetry run python -m unittest discover -v + @poetry run python -m unittest discover ${TESTDIR} -v # build and publish diff --git a/README.md b/README.md index 1c64423..d11cf69 100644 --- a/README.md +++ b/README.md @@ -10,13 +10,6 @@ The collections of tools for ML model development. You can use it as a command line tool if you like. And you can also use it as a library. Or you can run it in VSCode with [`launch.json`](.vscode/launch.json). -## Command Line Tool - -```bash -$ python -m kimchima auto sentence-transformers/all-MiniLM-L6-v2 Melbourne - -``` - # Acknowledgement diff --git a/cmds/__init__.py b/cmds/__init__.py deleted file mode 100644 index 23f8d4a..0000000 --- a/cmds/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from cmds.auto import CommandAuto \ No newline at end of file diff --git a/examples/examples.py b/examples/examples.py new file mode 100644 index 0000000..b3c0077 --- /dev/null +++ b/examples/examples.py @@ -0,0 +1,20 @@ +from kimchima import Auto, get_device, get_capability + +model = Auto(model_name_or_path="sentence-transformers/all-MiniLM-L6-v2") + +# computing embeddings for single text +embeddings = model.get_embeddings(text="Melbourne") +print(embeddings.shape) + +# computing embeddings for multiple texts +embeddings = model.get_embeddings(text=["Melbourne", "Sydney"]) +print(embeddings.shape) + +# Checking the device: GPU, mps and CPU +device = get_device() +print(device) + + +# get capability of GPU(Nvidia) +capability = get_capability() +print(capability) \ No newline at end of file diff --git a/pkg/__init__.py b/pkg/__init__.py deleted file mode 100644 index 8d23440..0000000 --- a/pkg/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -from pkg.devices import * -from pkg.auto import * diff --git a/pkg/auto/__init__.py b/pkg/auto/__init__.py deleted file mode 100644 index f26097b..0000000 --- a/pkg/auto/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from pkg.auto.auto import Auto diff --git a/pkg/devices/__init__.py b/pkg/devices/__init__.py deleted file mode 100644 index 3691d75..0000000 --- a/pkg/devices/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from pkg.devices.devices import * \ No newline at end of file diff --git a/pkg/dump/__init__.py b/pkg/dump/__init__.py deleted file mode 100644 index 1b88290..0000000 --- a/pkg/dump/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from pkg.dump.dump import Dump \ No newline at end of file diff --git a/pkg/dump/dump.py b/pkg/dump/dump.py deleted file mode 100644 index f67835b..0000000 --- a/pkg/dump/dump.py +++ /dev/null @@ -1,147 +0,0 @@ -import numpy as np -import torch -import pathlib - - -class Dump: - """ - A class that provides methods to save various components of a transformer model. - """ - @staticmethod - def save_scalar(s, name, path): - """ - Saves a scalar value to a numpy file. - - Args: - - s: The scalar value to be saved. - - name: The name of the file to be saved. - - path: The path where the file will be saved. - """ - s = np.array([1.0, float(s)]).astype(np.float32) - np.save(pathlib.Path(path, f'{name}.npy'), s) - - @staticmethod - def save_tensor(tensor, name, path): - """ - Saves a tensor to a numpy file. - - Args: - - tensor: The tensor to be saved. - - name: The name of the file to be saved. - - path: The path where the file will be saved. - """ - tensor_numpy=tensor.numpy() - tensor_dims =np.array(tensor_numpy.shape) - tensor_values = tensor_numpy.flatten() - tensor_to_save = np.concatenate((tensor_dims, tensor_values)).astype(np.float32) - np.save(pathlib.Path(path, f'{name}.npy'), tensor_to_save) - - @staticmethod - def save_linear(linear, path): - """ - Saves the weight and bias of a linear layer to numpy files. - - Args: - - linear: The linear layer to be saved. - - path: The path where the files will be saved. - """ - pathlib.Path(path).mkdir(parents=True, exist_ok=True) - Dump.save_tensor(linear.weight.t(), 'weight', path) # PyTorch and Tinygrad strangely transpose linear weights so reverse that - if linear.bias is not None: - Dump.save_tensor(linear.bias, 'bias', path) - - @staticmethod - def save_rmsnorm(norm, path): - """ - Saves the weight and epsilon value of a RMSNorm layer to numpy files. - - Args: - - norm: The RMSNorm layer to be saved. - - path: The path where the files will be saved. - """ - pathlib.Path(path).mkdir(parents=True, exist_ok=True) - Dump.save_tensor(norm.weight, 'weight', path) - Dump.save_scalar(norm.eps, 'eps', path) - - @staticmethod - def save_attention(attention, path): - """ - Saves the weight of the query, key, value and output linear layers of an attention layer to numpy files. - - Args: - - attention: The attention layer to be saved. - - path: The path where the files will be saved. - """ - pathlib.Path(path).mkdir(parents=True, exist_ok=True) - Dump.save_linear(attention.wq, pathlib.Path(path, 'wq')) - Dump.save_linear(attention.wk, pathlib.Path(path, 'wk')) - Dump.save_linear(attention.wv, pathlib.Path(path, 'wv')) - Dump.save_linear(attention.wo, pathlib.Path(path, 'wo')) - n_kv_head = attention.n_kv_heads - n_head = n_kv_head * attention.n_rep - Dump.save_scalar(n_head, "n_head", path) - Dump.save_scalar(n_kv_head, "n_kv_head", path) - - @staticmethod - def save_feedforward(feed_forward, path): - """ - Saves the weight of the three linear layers of a feedforward layer to numpy files. - - Args: - - feed_forward: The feedforward layer to be saved. - - path: The path where the files will be saved. - """ - pathlib.Path(path).mkdir(parents=True, exist_ok=True) - Dump.save_linear(feed_forward.w1, pathlib.Path(path, 'w1')) - Dump.save_linear(feed_forward.w2, pathlib.Path(path, 'w2')) - Dump.save_linear(feed_forward.w3, pathlib.Path(path, 'w3')) - - @staticmethod - def save_embedding(embedding, path): - """ - Saves the weight of an embedding layer to a numpy file. - - Args: - - embedding: The embedding layer to be saved. - - path: The path where the file will be saved. - """ - pathlib.Path(path).mkdir(parents=True, exist_ok=True) - Dump.save_tensor(embedding.weight, 'weight', path) - - @staticmethod - def save_transformer_block(transformer_block, path): - """ - Saves the components of a transformer block to numpy files. - - Args: - - transformer_block: The transformer block to be saved. - - path: The path where the files will be saved. - """ - pathlib.Path(path).mkdir(parents=True, exist_ok=True) - Dump.save_attention(transformer_block.attention, pathlib.Path(path, 'attention')) - Dump.save_feedforward(transformer_block.feed_forward, pathlib.Path(path, 'feedforward')) - Dump.save_rmsnorm(transformer_block.attention_norm, pathlib.Path(path, 'attention_norm')) - Dump.save_rmsnorm(transformer_block.ffn_norm, pathlib.Path(path, 'ffn_norm')) - - @staticmethod - def save_transformer(transformer, path): - """ - Saves the components of a transformer model to numpy files. - - Args: - - transformer: The transformer model to be saved. - - path: The path where the files will be saved. - """ - with torch.no_grad(): - pathlib.Path(path).mkdir(parents=True, exist_ok=True) - Dump.save_scalar(len(transformer.layers), 'n_layer', path) - for idx, layer in enumerate(transformer.layers): - Dump.save_transformer_block(layer, pathlib.Path(path, f'layer{idx}')) - Dump.save_rmsnorm(transformer.norm, pathlib.Path(path, 'norm')) - Dump.save_embedding(transformer.tok_embeddings, pathlib.Path(path, 'tok_embeddings')) - Dump.save_linear(transformer.output, pathlib.Path(path, 'output')) - Dump.save_scalar(10000.0, 'theta', path) - Dump.save_scalar(transformer.params.max_seq_len, 'n_ctx', path) - Dump.save_scalar(transformer.params.multiple_of, 'multiple_of', path) - if transformer.params.ffn_dim_multiplier is not None: - Dump.save_scalar(transformer.params.ffn_dim_multiplier, 'ffn_dim_multiplier', path) diff --git a/pkg/model/__init__.py b/pkg/model/__init__.py deleted file mode 100644 index 5770728..0000000 --- a/pkg/model/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from pkg.model.model import * \ No newline at end of file diff --git a/pkg/model/model.py b/pkg/model/model.py deleted file mode 100644 index fa61b32..0000000 --- a/pkg/model/model.py +++ /dev/null @@ -1,453 +0,0 @@ -# This file is adapted from the LLama project: -# https://github.com/facebookresearch/llama/blob/main/llama/model.py - -# Original LLama code by Facebook AI Research -# Adapted by Aisuko - -import math -from dataclasses import dataclass -from typing import Optional, Tuple - -import torch -import torch.nn.functional as F -import torch.nn as nn -from torch import nn -from torch.nn import Embedding, Linear - - -@dataclass -class ModelArgs: - """ - A class to store the arguments for the model. - - Attributes: - ----------- - dim : int - The dimension of the model. - n_layers : int - The number of layers in the model. - n_heads : int - The number of heads in the model. - n_kv_heads : Optional[int] - The number of key-value heads in the model. - vocab_size : int - The size of the vocabulary. - multiple_of : int - The multiple of the SwiGLU hidden layer size. - ffn_dim_multiplier : Optional[float] - The multiplier for the feedforward network dimension. - norm_eps : float - The epsilon value for normalization. - - max_batch_size : int - The maximum batch size for the model. - max_seq_len : int - The maximum sequence length for the model. - """ - dim: int = 4096 - n_layers: int = 32 - n_heads: int = 32 - n_kv_heads: Optional[int] = None - vocab_size: int = -1 # defined later by tokenizer - multiple_of: int = 256 # make SwiGLU hidden layer size multiple of large power of 2 - ffn_dim_multiplier: Optional[float] = None - norm_eps: float = 1e-5 - - max_batch_size: int = 32 - max_seq_len: int = 2048 - - -class RMSNorm(torch.nn.Module): - """ - Root Mean Square Normalization (RMSNorm) layer. - - Args: - dim (int): The dimension of the input tensor. - eps (float): A small value added to the denominator for numerical stability. Default: 1e-6. - """ - def __init__(self, dim: int, eps: float = 1e-6): - super().__init__() - self.eps = eps - self.weight = nn.Parameter(torch.ones(dim)) - - def _norm(self, x): - return x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps) - - def forward(self, x): - output = self._norm(x.float()).type_as(x) - return output * self.weight - - -def precompute_freqs_cis(dim: int, end: int, theta: float = 10000.0): - """ - Precomputes the frequency of cosine and sine functions for a given dimension and end value. - - Args: - dim (int): The dimension of the frequency tensor. - end (int): The end value of the frequency tensor. - theta (float, optional): The scaling factor for the frequency tensor. Defaults to 10000.0. - - Returns: - freqs_cis (torch.Tensor): A complex tensor of shape (end, dim/2) containing the precomputed frequency values. - """ - freqs = 1.0 / (theta ** (torch.arange(0, dim, 2)[: (dim // 2)].float() / dim)) - t = torch.arange(end, device=freqs.device) # type: ignore - freqs = torch.outer(t, freqs).float() # type: ignore - freqs_cis = torch.polar(torch.ones_like(freqs), freqs) # complex64 - return freqs_cis - - -def reshape_for_broadcast(freqs_cis: torch.Tensor, x: torch.Tensor): - """ - Reshapes the input tensor `freqs_cis` to match the shape of `x` for broadcasting. - - Args: - freqs_cis (torch.Tensor): A tensor of shape (x.shape[1], x.shape[-1]). - x (torch.Tensor): The input tensor. - - Returns: - torch.Tensor: The reshaped tensor of the same shape as `x` except for the second and second-to-last dimensions. - """ - ndim = x.ndim - assert 0 <= 1 < ndim - assert freqs_cis.shape == (x.shape[1], x.shape[-1]) - shape = [d if i == 1 or i == ndim - 1 else 1 for i, d in enumerate(x.shape)] - return freqs_cis.view(*shape) - - -def apply_rotary_emb( - xq: torch.Tensor, - xk: torch.Tensor, - freqs_cis: torch.Tensor, -) -> Tuple[torch.Tensor, torch.Tensor]: - """ - Applies rotary embeddings to the input tensors xq and xk using the given frequency tensor freqs_cis. - - Args: - xq (torch.Tensor): The query tensor. - xk (torch.Tensor): The key tensor. - freqs_cis (torch.Tensor): The frequency tensor. - - Returns: - Tuple[torch.Tensor, torch.Tensor]: A tuple of the output tensors xq_out and xk_out. - """ - xq_ = torch.view_as_complex(xq.float().reshape(*xq.shape[:-1], -1, 2)) - xk_ = torch.view_as_complex(xk.float().reshape(*xk.shape[:-1], -1, 2)) - freqs_cis = reshape_for_broadcast(freqs_cis, xq_) - xq_out = torch.view_as_real(xq_ * freqs_cis).flatten(3) - xk_out = torch.view_as_real(xk_ * freqs_cis).flatten(3) - return xq_out.type_as(xq), xk_out.type_as(xk) - - -def repeat_kv(x: torch.Tensor, n_rep: int) -> torch.Tensor: - """torch.repeat_interleave(x, dim=2, repeats=n_rep)""" - bs, slen, n_kv_heads, head_dim = x.shape - if n_rep == 1: - return x - return ( - x[:, :, :, None, :] - .expand(bs, slen, n_kv_heads, n_rep, head_dim) - .reshape(bs, slen, n_kv_heads * n_rep, head_dim) - ) - - -class Attention(nn.Module): - def __init__(self, args: ModelArgs): - """ - Initializes the Attention module. - - Args: - - args: ModelArgs object containing the following attributes: - - n_heads: int, number of attention heads - - n_kv_heads: int, number of key-value attention heads - - dim: int, dimension of the model - - max_batch_size: int, maximum batch size - - max_seq_len: int, maximum sequence length - """ - super().__init__() - self.n_kv_heads = args.n_heads if args.n_kv_heads is None else args.n_kv_heads - model_parallel_size = 1#fs_init.get_model_parallel_world_size() - self.n_local_heads = args.n_heads // model_parallel_size - self.n_local_kv_heads = self.n_kv_heads // model_parallel_size - self.n_rep = self.n_local_heads // self.n_local_kv_heads - self.head_dim = args.dim // args.n_heads - - self.wq = nn.Linear( - args.dim, - args.n_heads * self.head_dim, - bias=False, - ) - self.wk = nn.Linear( - args.dim, - self.n_kv_heads * self.head_dim, - bias=False, - ) - self.wv = nn.Linear( - args.dim, - self.n_kv_heads * self.head_dim, - bias=False, - ) - self.wo = nn.Linear( - args.n_heads * self.head_dim, - args.dim, - bias=False, - ) - - self.cache_k = torch.zeros( - ( - args.max_batch_size, - args.max_seq_len, - self.n_local_kv_heads, - self.head_dim, - ) - ) - self.cache_v = torch.zeros( - ( - args.max_batch_size, - args.max_seq_len, - self.n_local_kv_heads, - self.head_dim, - ) - ) - - def forward( - self, - x: torch.Tensor, - start_pos: int, - freqs_cis: torch.Tensor, - mask: Optional[torch.Tensor], - ) -> torch.Tensor: - """ - Performs a forward pass of the Attention module. - - Args: - - x: torch.Tensor of shape (batch_size, sequence_length, model_dimension), input tensor - - start_pos: int, starting position of the sequence - - freqs_cis: torch.Tensor of shape (sequence_length, model_dimension), cosine and sine frequencies for rotary embeddings - - mask: Optional[torch.Tensor] of shape (batch_size, sequence_length, sequence_length), mask tensor - - Returns: - - output: torch.Tensor of shape (batch_size, sequence_length, model_dimension), output tensor - """ - bsz, seqlen, _ = x.shape - xq, xk, xv = self.wq(x), self.wk(x), self.wv(x) - - xq = xq.view(bsz, seqlen, self.n_local_heads, self.head_dim) - xk = xk.view(bsz, seqlen, self.n_local_kv_heads, self.head_dim) - xv = xv.view(bsz, seqlen, self.n_local_kv_heads, self.head_dim) - - xq, xk = apply_rotary_emb(xq, xk, freqs_cis=freqs_cis) - - self.cache_k = self.cache_k.to(xq) - self.cache_v = self.cache_v.to(xq) - - self.cache_k[:bsz, start_pos : start_pos + seqlen] = xk - self.cache_v[:bsz, start_pos : start_pos + seqlen] = xv - - keys = self.cache_k[:bsz, : start_pos + seqlen] - values = self.cache_v[:bsz, : start_pos + seqlen] - - # repeat k/v heads if n_kv_heads < n_heads - keys = repeat_kv(keys, self.n_rep) # (bs, seqlen, n_local_heads, head_dim) - values = repeat_kv(values, self.n_rep) # (bs, seqlen, n_local_heads, head_dim) - - xq = xq.transpose(1, 2) # (bs, n_local_heads, seqlen, head_dim) - keys = keys.transpose(1, 2) - values = values.transpose(1, 2) - scores = torch.matmul(xq, keys.transpose(2, 3)) / math.sqrt(self.head_dim) - if mask is not None: - scores = scores + mask # (bs, n_local_heads, seqlen, cache_len + seqlen) - scores = F.softmax(scores.float(), dim=-1).type_as(xq) - output = torch.matmul(scores, values) # (bs, n_local_heads, seqlen, head_dim) - output = output.transpose(1, 2).contiguous().view(bsz, seqlen, -1) - return self.wo(output) - - -class FeedForward(nn.Module): - """ - A feedforward neural network module. - - Args: - dim (int): The input dimension. - hidden_dim (int): The hidden dimension. - multiple_of (int): The output dimension is a multiple of this value. - ffn_dim_multiplier (Optional[float]): A multiplier for the hidden dimension. - - Attributes: - w1 (nn.Linear): The first linear layer. - w2 (nn.Linear): The second linear layer. - w3 (nn.Linear): The third linear layer. - """ - - def __init__( - self, - dim: int, - hidden_dim: int, - multiple_of: int, - ffn_dim_multiplier: Optional[float], - ): - super().__init__() - hidden_dim = int(2 * hidden_dim / 3) - # custom dim factor multiplier - if ffn_dim_multiplier is not None: - hidden_dim = int(ffn_dim_multiplier * hidden_dim) - hidden_dim = multiple_of * ((hidden_dim + multiple_of - 1) // multiple_of) - - self.w1 = Linear( - dim, hidden_dim, bias=False - ) - self.w2 = Linear( - hidden_dim, dim, bias=False - ) - self.w3 = Linear( - dim, hidden_dim, bias=False - ) - - def forward(self, x): - """ - Forward pass of the feedforward neural network. - - Args: - x (torch.Tensor): The input tensor. - - Returns: - torch.Tensor: The output tensor. - """ - return self.w2(F.silu(self.w1(x)) * self.w3(x)) - - -class TransformerBlock(nn.Module): - """ - A transformer block that consists of a self-attention layer and a feedforward layer. - - Args: - layer_id (int): The ID of the layer. - args (ModelArgs): The arguments for the model. - - Attributes: - n_heads (int): The number of attention heads. - dim (int): The dimension of the model. - head_dim (int): The dimension of each attention head. - attention (Attention): The self-attention layer. - feed_forward (FeedForward): The feedforward layer. - layer_id (int): The ID of the layer. - attention_norm (RMSNorm): The normalization layer for the attention output. - ffn_norm (RMSNorm): The normalization layer for the feedforward output. - """ - - def __init__(self, layer_id: int, args: ModelArgs): - super().__init__() - self.n_heads = args.n_heads - self.dim = args.dim - self.head_dim = args.dim // args.n_heads - self.attention = Attention(args) - self.feed_forward = FeedForward( - dim=args.dim, - hidden_dim=4 * args.dim, - multiple_of=args.multiple_of, - ffn_dim_multiplier=args.ffn_dim_multiplier, - ) - self.layer_id = layer_id - self.attention_norm = RMSNorm(args.dim, eps=args.norm_eps) - self.ffn_norm = RMSNorm(args.dim, eps=args.norm_eps) - - def forward( - self, - x: torch.Tensor, - start_pos: int, - freqs_cis: torch.Tensor, - mask: Optional[torch.Tensor], - ) -> torch.Tensor: - """ - Forward pass of the transformer block. - - Args: - x (torch.Tensor): The input tensor. - start_pos (int): The starting position for the input sequence. - freqs_cis (torch.Tensor): The frequency tensor for the input sequence. - mask (Optional[torch.Tensor]): The mask tensor for the input sequence. - - Returns: - torch.Tensor: The output tensor. - """ - h = x + self.attention.forward( - self.attention_norm(x), start_pos, freqs_cis, mask - ) - out = h + self.feed_forward.forward(self.ffn_norm(h)) - return out - - -class Transformer(nn.Module): - """ - A Transformer model for sequence-to-sequence tasks. - - Args: - params (ModelArgs): A dataclass containing the model hyperparameters. - - Attributes: - params (ModelArgs): The model hyperparameters. - vocab_size (int): The size of the vocabulary. - n_layers (int): The number of layers in the Transformer. - tok_embeddings (Embedding): The token embeddings layer. - layers (ModuleList): The list of TransformerBlock layers. - norm (RMSNorm): The normalization layer. - output (Linear): The output layer. - freqs_cis (torch.Tensor): The precomputed cosine frequencies. - - Methods: - forward(tokens, start_pos): The forward pass of the model. - """ - - def __init__(self, params: ModelArgs): - super().__init__() - self.params = params - self.vocab_size = params.vocab_size - self.n_layers = params.n_layers - - self.tok_embeddings = Embedding( - params.vocab_size, params.dim - ) - - self.layers = torch.nn.ModuleList() - for layer_id in range(params.n_layers): - self.layers.append(TransformerBlock(layer_id, params)) - - self.norm = RMSNorm(params.dim, eps=params.norm_eps) - self.output = Linear( - params.dim, params.vocab_size, bias=False - ) - - self.freqs_cis = precompute_freqs_cis( - self.params.dim // self.params.n_heads, self.params.max_seq_len * 2 - ) - - @torch.inference_mode() - def forward(self, tokens: torch.Tensor, start_pos: int): - """ - The forward pass of the model. - - Args: - tokens (torch.Tensor): The input sequence of tokens. - start_pos (int): The starting position of the sequence. - - Returns: - output (torch.Tensor): The output sequence of logits. - """ - _bsz, seqlen = tokens.shape - h = self.tok_embeddings(tokens) - self.freqs_cis = self.freqs_cis.to(h.device) - freqs_cis = self.freqs_cis[start_pos : start_pos + seqlen] - - mask = None - if seqlen > 1: - mask = torch.full( - (1, 1, seqlen, seqlen), float("-inf"), device=tokens.device - ) - mask = torch.triu(mask, diagonal=start_pos + 1).type_as(h) - - for layer in self.layers: - h = layer(h, start_pos, freqs_cis, mask) - h = self.norm(h) - output = self.output(h).float() - return output - \ No newline at end of file diff --git a/pkg/tokenizer/__init__.py b/pkg/tokenizer/__init__.py deleted file mode 100644 index b1eb644..0000000 --- a/pkg/tokenizer/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from pkg.tokenizer.tokenizer import Tokenizer \ No newline at end of file diff --git a/pkg/tokenizer/tokenizer.py b/pkg/tokenizer/tokenizer.py deleted file mode 100644 index 77f3ce7..0000000 --- a/pkg/tokenizer/tokenizer.py +++ /dev/null @@ -1,85 +0,0 @@ -# This file is adapted from the LLama project: -# https://github.com/facebookresearch/llama/blob/main/llama/tokenizer.py - -# Original LLama code by Facebook AI Research -# Adapted by Aisuko - -from typing import List -import sentencepiece as spm - -class Tokenizer: - """ - A class for encoding and decoding text using SentencePiece tokenizer. - - Attributes: - ----------- - sp_model : sentencepiece.SentencePieceProcessor - The SentencePiece model used for encoding and decoding. - n_words : int - The size of the vocabulary. - bos_id : int - The ID of the beginning-of-sentence token. - eos_id : int - The ID of the end-of-sentence token. - pad_id : int - The ID of the padding token. - """ - - def __init__(self, model_path: str): - """ - Initializes a Tokenizer object. - - Parameters: - ----------- - model_path : str - The path to the SentencePiece model file. - """ - self.sp_model = spm.SentencePieceProcessor(model_file=model_path) - - # BOS / EOS token IDs - self.n_words: int = self.sp_model.vocab_size() - self.bos_id: int = self.sp_model.bos_id() - self.eos_id: int = self.sp_model.eos_id() - self.pad_id: int = self.sp_model.pad_id() - - def encode(self, s: str, bos: bool, eos: bool) -> List[int]: - """ - Encodes a string using the SentencePiece model. - - Parameters: - ----------- - s : str - The string to be encoded. - bos : bool - Whether to add a beginning-of-sentence token to the beginning of the encoded sequence. - eos : bool - Whether to add an end-of-sentence token to the end of the encoded sequence. - - Returns: - -------- - List[int] - The encoded sequence as a list of integers. - """ - assert type(s) is str - t = self.sp_model.encode(s) - if bos: - t = [self.bos_id] + t - if eos: - t = t + [self.eos_id] - return t - - def decode(self, t: List[int]) -> str: - """ - Decodes a sequence of integers using the SentencePiece model. - - Parameters: - ----------- - t : List[int] - The sequence of integers to be decoded. - - Returns: - -------- - str - The decoded string. - """ - return self.sp_model.decode(t) \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 67a0a14..e747053 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] name = "kimchima" -version = "0.2.1" -description = "" +version = "0.2.2" +description = "The collections of tools for ML model development." authors = ["Aisuko "] license = "Apache-2.0" readme = "README.md" @@ -9,11 +9,7 @@ homepage = "https://github.com/Aisuko/kimchi" repository = "https://github.com/Aisuko/kimchi" keywords = ["ai", "llm"] -packages = [ - { include = "pkg" }, -] -exclude = ["pkg/dump", "pkg/model", "pkg/tokenizer"] [tool.poetry.dependencies] python = "^3.11" diff --git a/src/kimchima/__init__.py b/src/kimchima/__init__.py new file mode 100644 index 0000000..20915a2 --- /dev/null +++ b/src/kimchima/__init__.py @@ -0,0 +1,22 @@ +# coding=utf-8 +# Copyright [2024] [Aisuko] +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__version__="0.2.2" + +from .pkg import ( + Auto, + Devices, + get_device, + get_capability +) \ No newline at end of file diff --git a/src/kimchima/cmds/__init__.py b/src/kimchima/cmds/__init__.py new file mode 100644 index 0000000..6e45d93 --- /dev/null +++ b/src/kimchima/cmds/__init__.py @@ -0,0 +1 @@ +from .auto_cli import CommandAuto \ No newline at end of file diff --git a/cmds/auto.py b/src/kimchima/cmds/auto_cli.py similarity index 98% rename from cmds/auto.py rename to src/kimchima/cmds/auto_cli.py index 91ab21d..43d35b8 100644 --- a/cmds/auto.py +++ b/src/kimchima/cmds/auto_cli.py @@ -19,7 +19,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -from pkg import Auto +from kimchima.pkg import Auto class CommandAuto: diff --git a/kimchima.py b/src/kimchima/cmds/kimchima_cli.py similarity index 96% rename from kimchima.py rename to src/kimchima/cmds/kimchima_cli.py index 0b27aaf..3145842 100644 --- a/kimchima.py +++ b/src/kimchima/cmds/kimchima_cli.py @@ -14,7 +14,7 @@ import argparse -from cmds.auto import CommandAuto +from kimchima.cmds.auto_cli import CommandAuto def main(): diff --git a/src/kimchima/pkg/__init__.py b/src/kimchima/pkg/__init__.py new file mode 100644 index 0000000..d031dff --- /dev/null +++ b/src/kimchima/pkg/__init__.py @@ -0,0 +1,6 @@ +from .auto import Auto +from .devices import ( + Devices, + get_device, + get_capability +) \ No newline at end of file diff --git a/pkg/auto/auto.py b/src/kimchima/pkg/auto.py similarity index 100% rename from pkg/auto/auto.py rename to src/kimchima/pkg/auto.py diff --git a/pkg/devices/devices.py b/src/kimchima/pkg/devices.py similarity index 65% rename from pkg/devices/devices.py rename to src/kimchima/pkg/devices.py index 0688da5..d82e5b7 100644 --- a/pkg/devices/devices.py +++ b/src/kimchima/pkg/devices.py @@ -12,10 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations + from enum import Enum import torch import platform +from typing import Tuple + + class Devices(Enum): Silicon = 'mps' CPU = 'cpu' @@ -23,7 +28,7 @@ class Devices(Enum): GPU = 'cuda' -def get_device(): +def get_device()-> Devices: """ Only support Single GPU for now """ @@ -32,3 +37,17 @@ def get_device(): elif torch.cuda.is_available(): return Devices.GPU return Devices.CPU + + +def get_capability()-> Tuple[int, int]: + """ + Get the capability of the device(GPU) for current env, this is used for support latest quantization techniques like: Marlin + + Returns: + tuple: The capability of the device(GPU) for current env. + + For not GPU env, return (0, 0) + """ + if get_device() == Devices.GPU: + return torch.cuda.get_device_capability() + return (0, 0) \ No newline at end of file diff --git a/tests/__init__.py b/src/kimchima/tests/__init__.py similarity index 100% rename from tests/__init__.py rename to src/kimchima/tests/__init__.py diff --git a/tests/test_auto.py b/src/kimchima/tests/test_auto.py similarity index 87% rename from tests/test_auto.py rename to src/kimchima/tests/test_auto.py index ee69585..bae9d04 100644 --- a/tests/test_auto.py +++ b/src/kimchima/tests/test_auto.py @@ -14,7 +14,7 @@ import unittest -from pkg.auto import Auto +from kimchima.pkg import Auto class TestAuto(unittest.TestCase): @@ -22,13 +22,21 @@ class TestAuto(unittest.TestCase): model_name = 'sentence-transformers/all-MiniLM-L6-v2' def test_get_embeddings(self): + """ + Test get_embeddings method + """ model = Auto(model_name_or_path=self.model_name) embeddings = model.get_embeddings(text='Melbourne') self.assertIsNotNone(embeddings) self.assertEqual(embeddings.shape, (1, 384)) + def test_get_embeddings_with_list(self): + """ + Test get_embeddings method with list of text + """ model = Auto(model_name_or_path=self.model_name) embeddings = model.get_embeddings(text=['Melbourne', 'Sydney']) self.assertIsNotNone(embeddings) self.assertEqual(embeddings.shape, (2, 384)) + diff --git a/tests/test_devices.py b/src/kimchima/tests/test_devices.py similarity index 68% rename from tests/test_devices.py rename to src/kimchima/tests/test_devices.py index 42032c5..000eb5a 100644 --- a/tests/test_devices.py +++ b/src/kimchima/tests/test_devices.py @@ -16,7 +16,7 @@ import platform import torch -from pkg.devices import Devices, get_device +from kimchima.pkg import Devices, get_device, get_capability class TestDevices(unittest.TestCase): @@ -34,3 +34,16 @@ def test_get_device(self): if platform.system() != 'Darwin' and not torch.cuda.is_available(): self.assertEqual(get_device(), Devices.CPU) + + def test_get_capability(self): + + # Test if the device is a GPU + if get_device() == Devices.GPU: + self.assertIsInstance(get_capability(), tuple) + self.assertEqual(len(get_capability()), 2) + self.assertIsInstance(get_capability()[0], int) + self.assertIsInstance(get_capability()[1], int) + + # Test if the device is not a GPU + if get_device() != Devices.GPU: + self.assertEqual(get_capability(), (0, 0)) diff --git a/tests/test_dump.py b/tests/test_dump.py deleted file mode 100644 index 7b022c6..0000000 --- a/tests/test_dump.py +++ /dev/null @@ -1,41 +0,0 @@ -import unittest -import torch -from pkg.model import Transformer, ModelArgs - -from pkg.dump import Dump - - -class TestDump(unittest.TestCase): - - @unittest.skip("Pass testing for no useful features") - def test_dump(self): - n_vocab = 10 - n_state = 8 - multiple_of = 3 - n_head = 4 - n_kv_head = 2 - n_layer = 3 - norm_eps = 1e-6 - max_batch_size = 1 - - - model_args = ModelArgs( - dim=n_state, - n_layers=n_layer, - n_heads=n_head, - n_kv_heads=n_kv_head, - vocab_size=n_vocab, - multiple_of=multiple_of, - norm_eps=norm_eps, - max_batch_size=max_batch_size, - ) - - llama = Transformer(model_args) - - with torch.no_grad(): - tokens=torch.tensor([0,2,1], dtype=torch.int32).unsqueeze(0) - output=llama(tokens,0) - self.assertIsNotNone(tokens.numpy()) - self.assertIsNotNone(output.numpy()) - - Dump.save_transformer(llama, 'params') diff --git a/tests/test_tokenizer.py b/tests/test_tokenizer.py deleted file mode 100644 index d55c7e2..0000000 --- a/tests/test_tokenizer.py +++ /dev/null @@ -1,26 +0,0 @@ -import unittest -import os - -from pkg.tokenizer import tokenizer - -class TestTokenizer(unittest.TestCase): - - test_prompt="This is a test prompt." - - @unittest.skip("Pass testing for no useful features") - def test_tokenize(self): - # get home path from environment variable - home = os.environ['HOME'] - - tok=tokenizer.Tokenizer(home+"/Downloads/workspace/llama/tokenizer.model") - - # tok should not be none - self.assertIsNotNone(tok) - - encoded = tok.encode(self.test_prompt, True, True) - self.assertIsNotNone(encoded) - - decoded = tok.decode(encoded) - - self.assertEqual(decoded, self.test_prompt) -