Skip to content

Commit

Permalink
Merge branch 'upstream' into concedo_experimental
Browse files Browse the repository at this point in the history
# Conflicts:
#	README.md
#	examples/gbnf-validator/gbnf-validator.cpp
#	examples/llava/clip.cpp
#	examples/run/README.md
#	examples/run/run.cpp
#	examples/server/README.md
#	ggml/src/ggml-cpu/CMakeLists.txt
#	src/llama.cpp
#	tests/test-grammar-integration.cpp
#	tests/test-llama-grammar.cpp
  • Loading branch information
LostRuins committed Dec 21, 2024
2 parents b7d3274 + e34c5af commit 4c56b7c
Show file tree
Hide file tree
Showing 19 changed files with 550 additions and 226 deletions.
14 changes: 13 additions & 1 deletion common/arg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -627,7 +627,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
[](common_params & params) {
params.ctx_shift = false;
}
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_IMATRIX}).set_env("LLAMA_ARG_NO_CONTEXT_SHIFT"));
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_PERPLEXITY}).set_env("LLAMA_ARG_NO_CONTEXT_SHIFT"));
add_opt(common_arg(
{"--chunks"}, "N",
string_format("max number of chunks to process (default: %d, -1 = all)", params.n_chunks),
Expand Down Expand Up @@ -2207,5 +2207,17 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
}
).set_examples({LLAMA_EXAMPLE_TTS, LLAMA_EXAMPLE_SERVER}));

// model-specific
add_opt(common_arg(
{"--tts-oute-default"},
string_format("use default OuteTTS models (note: can download weights from the internet)"),
[](common_params & params) {
params.hf_repo = "OuteAI/OuteTTS-0.2-500M-GGUF";
params.hf_file = "OuteTTS-0.2-500M-Q8_0.gguf";
params.vocoder.hf_repo = "ggml-org/WavTokenizer";
params.vocoder.hf_file = "WavTokenizer-Large-75-F16.gguf";
}
).set_examples({LLAMA_EXAMPLE_TTS}));

return ctx_arg;
}
65 changes: 63 additions & 2 deletions convert_hf_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2200,6 +2200,15 @@ class Phi3MiniModel(Model):
model_arch = gguf.MODEL_ARCH.PHI3

def set_vocab(self):
# Phi-4 model uses GPT2Tokenizer
tokenizer_config_file = self.dir_model / 'tokenizer_config.json'
if tokenizer_config_file.is_file():
with open(tokenizer_config_file, "r", encoding="utf-8") as f:
tokenizer_config_json = json.load(f)
tokenizer_class = tokenizer_config_json['tokenizer_class']
if tokenizer_class == 'GPT2Tokenizer':
return self._set_vocab_gpt2()

from sentencepiece import SentencePieceProcessor

tokenizer_path = self.dir_model / 'tokenizer.model'
Expand Down Expand Up @@ -2316,7 +2325,11 @@ def set_gguf_parameters(self):
self.gguf_writer.add_rope_dimension_count(rope_dims)
self.gguf_writer.add_rope_freq_base(self.find_hparam(["rope_theta"]))
self.gguf_writer.add_file_type(self.ftype)
self.gguf_writer.add_sliding_window(self.find_hparam(["sliding_window"]))
sliding_window = self.hparams.get("sliding_window")
# use zero value of sliding_window to distinguish Phi-4 from other PHI3 models
if sliding_window is None:
sliding_window = 0
self.gguf_writer.add_sliding_window(sliding_window)

def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
n_embd = self.find_hparam(["hidden_size", "n_embd"])
Expand Down Expand Up @@ -2615,7 +2628,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
return [(self.map_tensor_name(name), data_torch)]


@Model.register("BertModel", "CamembertModel", "RobertaModel")
@Model.register("BertModel", "CamembertModel")
class BertModel(Model):
model_arch = gguf.MODEL_ARCH.BERT

Expand Down Expand Up @@ -2688,6 +2701,51 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
return [(self.map_tensor_name(name), data_torch)]


@Model.register("RobertaModel")
class RobertaModel(BertModel):
model_arch = gguf.MODEL_ARCH.BERT

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

# we need the pad_token_id to know how to chop down position_embd matrix
if (pad_token_id := self.hparams.get("pad_token_id")) is not None:
self._position_offset = 1 + pad_token_id
if "max_position_embeddings" in self.hparams:
self.hparams["max_position_embeddings"] -= self._position_offset
else:
self._position_offset = None

def set_vocab(self):
"""Support BPE tokenizers for roberta models"""
bpe_tok_path = self.dir_model / "tokenizer.json"
if bpe_tok_path.exists():
self._set_vocab_gpt2()
self.gguf_writer.add_add_bos_token(True)
self.gguf_writer.add_add_eos_token(True)

# we need this to validate the size of the token_type embeddings
# though currently we are passing all zeros to the token_type embeddings
# "Sequence A" or "Sequence B"
self.gguf_writer.add_token_type_count(self.hparams.get("type_vocab_size", 1))

else:
return super().set_vocab()

def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
# if name starts with "roberta.", remove the prefix
# e.g. https://huggingface.co/BAAI/bge-reranker-v2-m3/tree/main
if name.startswith("roberta."):
name = name[8:]

# position embeddings start at pad_token_id + 1, so just chop down the weight tensor
if name == "embeddings.position_embeddings.weight":
if self._position_offset is not None:
data_torch = data_torch[self._position_offset:,:]

return super().modify_tensors(data_torch, name, bid)


@Model.register("NomicBertModel")
class NomicBertModel(BertModel):
model_arch = gguf.MODEL_ARCH.NOMIC_BERT
Expand Down Expand Up @@ -3007,6 +3065,9 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
if new_name.endswith("time_mix_w2.weight"):
data_torch = data_torch.permute(0, 2, 1)

if new_name.endswith("time_mix_decay.weight") or "lerp" in new_name:
data_torch = data_torch.squeeze()

rescale_every_n_layers = self.hparams["rescale_every"]
if rescale_every_n_layers > 0:
if new_name.endswith("time_mix_output.weight") or new_name.endswith("channel_mix_value.weight"):
Expand Down
Binary file modified examples/server/public/index.html.gz
Binary file not shown.
Loading

0 comments on commit 4c56b7c

Please sign in to comment.