-
Notifications
You must be signed in to change notification settings - Fork 41
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #18 from optuna/add-evo-merge-sampler
Add Evolutionary Model Merge sampler
- Loading branch information
Showing
11 changed files
with
412 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
MIT License | ||
|
||
Copyright (c) 2024 Preferred Networks, Inc. | ||
|
||
Permission is hereby granted, free of charge, to any person obtaining a copy | ||
of this software and associated documentation files (the "Software"), to deal | ||
in the Software without restriction, including without limitation the rights | ||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||
copies of the Software, and to permit persons to whom the Software is | ||
furnished to do so, subject to the following conditions: | ||
|
||
The above copyright notice and this permission notice shall be included in all | ||
copies or substantial portions of the Software. | ||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||
SOFTWARE. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
--- | ||
author: 'Optuna team' | ||
title: 'Evolutionary LLM Merge Sampler' | ||
description: 'A sampler for evolutionary LLM merge.' | ||
tags: ['sampler', 'LLM'] | ||
optuna_versions: [3.6.1] | ||
license: 'MIT License' | ||
--- | ||
|
||
## Class or Function Names | ||
- EvoMergeSampler | ||
- EvoMergeTrial | ||
|
||
## Installation | ||
```bash | ||
pip install git+https://github.com/arcee-ai/mergekit.git | ||
pip install sentencepiece accelerate protobuf bitsandbytes langchain langchain-community datasets | ||
pip install pandas cmaes | ||
export HF_TOKEN=xxx | ||
``` | ||
|
||
## Example | ||
```python | ||
sampler = EvoMergeSampler(base_config="path/to/config/yml/file") | ||
study = optuna.create_study(sampler=sampler) | ||
|
||
for _ in range(100): | ||
trial = study.ask() | ||
evo_merge_trial = EvoMergeTrial(study, trial._trial_id) | ||
model = evo_merge_trial.suggest_model() | ||
|
||
acc = try_model(model) | ||
|
||
study.tell(trial, acc) | ||
|
||
print(study.trials_dataframe(attrs=("number", "value"))) | ||
``` | ||
See `example.py` for a full example. You need GPU with 16G VLAM to run this example. | ||
The following figures are obtained from the analysis of the optimization. | ||
 | ||
 | ||
|
||
## Others | ||
|
||
### Reference | ||
Akiba, T., Shing, M., Tang, Y., Sun, Q., & Ha, D. (2024). Evolutionary Optimization of Model Merging Recipes. arXiv preprint arXiv:2403.13187. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
from .sampler import EvoMergeSampler | ||
from .trial import EvoMergeTrial | ||
|
||
|
||
__all__ = [ | ||
"EvoMergeSampler", | ||
"EvoMergeTrial", | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
models: | ||
- model: augmxnt/shisa-gamma-7b-v1 | ||
# No parameters necessary for base model | ||
- model: GAIR/Abel-7B-002 | ||
parameters: | ||
density: 0.5 | ||
weight: 0.5 | ||
- model: WizardLM/WizardMath-7B-V1.1 | ||
parameters: | ||
density: 0.5 | ||
weight: 0.5 | ||
merge_method: dare_ties | ||
base_model: augmxnt/shisa-gamma-7b-v1 | ||
parameters: | ||
int8_mask: true | ||
dtype: float16 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
from __future__ import annotations | ||
|
||
import argparse | ||
import datetime | ||
import json | ||
import os | ||
|
||
from datasets import load_dataset | ||
from langchain.chains import LLMChain | ||
from langchain.llms.base import BaseLLM | ||
from langchain.prompts import PromptTemplate | ||
import optuna | ||
|
||
from package.samplers.evo_merge.sampler import EvoMergeSampler | ||
from package.samplers.evo_merge.trial import EvoMergeTrial | ||
|
||
|
||
# EvoMergeSampler = optunahub.load_module("samplers/evo_merge").EvoMergeSampler | ||
# EvoMergeTrial = optunahub.load_module("samplers/evo_merge").EvoMergeTrial | ||
|
||
TEMPLATE = "質問に答えなさい。質問: {question} 回答: " | ||
|
||
|
||
if not os.path.exists("./simple.jsonl"): | ||
dataset = load_dataset("SakanaAI/gsm8k-ja-test_250-1319", split="test") | ||
with open("./simple.jsonl", "w") as fout: | ||
for q, a in zip(dataset["question"][:100], dataset["answer_number"][:100]): | ||
fout.write(json.dumps({"question": q, "answer_number": a}) + "\n") | ||
|
||
dataset = [] | ||
with open("./simple.jsonl") as fin: | ||
for line in fin: | ||
dataset.append(json.loads(line.strip())) | ||
|
||
|
||
def eval_jaqket(llm_chain: LLMChain) -> int: | ||
correct = 0 | ||
for problem in dataset: | ||
out = llm_chain.run(question=problem["question"]) | ||
if len(out.strip()) != 0: | ||
out = out.strip().split()[0].strip() | ||
if problem["answer_number"] in out: | ||
correct += 1 | ||
|
||
return correct | ||
|
||
|
||
def try_model(llm: BaseLLM) -> float: | ||
ptemplate = PromptTemplate.from_template(TEMPLATE) | ||
llm_chain = LLMChain(prompt=ptemplate, llm=llm) | ||
|
||
correct = eval_jaqket(llm_chain) | ||
|
||
return correct / len(dataset) | ||
|
||
|
||
def main() -> None: | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("--study-name", help="Optuna study name") | ||
args = parser.parse_args() | ||
|
||
if args.study_name: | ||
study_name = args.study_name | ||
else: | ||
study_name = f"optuna-merge-ja-{datetime.datetime.now().isoformat(timespec='minutes')}" | ||
|
||
sampler = EvoMergeSampler(base_config="./config.yml") | ||
study = optuna.create_study( | ||
direction="maximize", | ||
study_name=study_name, | ||
sampler=sampler, | ||
) | ||
|
||
for _ in range(100): | ||
trial = study.ask() | ||
evo_merge_trial = EvoMergeTrial(study, trial._trial_id) | ||
model = evo_merge_trial.suggest_model() | ||
|
||
acc = try_model(model) | ||
|
||
study.tell(trial, acc) | ||
|
||
print(study.trials_dataframe(attrs=("number", "value"))) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
from __future__ import annotations | ||
|
||
import copy | ||
import tempfile | ||
from typing import Any | ||
|
||
from langchain.llms.base import BaseLLM | ||
from langchain_community.llms import HuggingFacePipeline | ||
from mergekit.config import MergeConfiguration | ||
from mergekit.merge import MergeOptions | ||
from mergekit.merge import run_merge | ||
from optuna.distributions import BaseDistribution | ||
from optuna.distributions import FloatDistribution | ||
from optuna.samplers import BaseSampler | ||
from optuna.samplers import CmaEsSampler | ||
from optuna.study import Study | ||
from optuna.trial import FrozenTrial | ||
import torch | ||
from transformers import AutoModelForCausalLM | ||
from transformers import AutoTokenizer | ||
from transformers import BitsAndBytesConfig | ||
from transformers import pipeline | ||
import yaml | ||
|
||
from package.samplers.evo_merge.trial import EvoMergeTrial | ||
|
||
|
||
class EvoMergeSampler(BaseSampler): | ||
def __init__(self, base_config: str, seed: None | int = None) -> None: | ||
self.seed = seed | ||
self._cmaes = CmaEsSampler() | ||
|
||
with open(base_config, "r", encoding="utf-8") as fp: | ||
self._merge_config = MergeConfiguration.model_validate(yaml.safe_load(fp)) | ||
|
||
def infer_relative_search_space( | ||
self, study: Study, trial: FrozenTrial | ||
) -> dict[str, BaseDistribution]: | ||
return {} | ||
|
||
def sample_relative( | ||
self, study: Study, trial: FrozenTrial, search_space: dict[str, BaseDistribution] | ||
) -> dict[str, Any]: | ||
return {} | ||
|
||
def sample_independent( | ||
self, | ||
study: Study, | ||
trial: FrozenTrial, | ||
param_name: str, | ||
param_distribution: BaseDistribution, | ||
) -> Any: | ||
return param_distribution._asdict() | ||
|
||
def reseed_rng(self, seed: int) -> None: | ||
self.seed = seed | ||
|
||
def _sample( | ||
self, | ||
study: Study, | ||
trial: FrozenTrial, | ||
param_name: str, | ||
param_distribution: BaseDistribution, | ||
) -> Any: | ||
return param_distribution._asdict() | ||
|
||
def sample_model(self, study: Study, trial: EvoMergeTrial) -> BaseLLM: | ||
merge_config = copy.deepcopy(self._merge_config) | ||
for i, m in enumerate(merge_config.models): | ||
if i == 0: | ||
# No parameters necessary for base model. | ||
continue | ||
model_name = m.model | ||
search_space = {f"{model_name}-{k}": FloatDistribution(0, 1) for k in m.parameters} | ||
params = self._cmaes.sample_relative(study, trial, search_space) | ||
for name in params: | ||
param_value = params[name] | ||
distribution = search_space[name] | ||
param_value_in_internal_repr = distribution.to_internal_repr(param_value) | ||
study._storage.set_trial_param( | ||
trial._trial_id, name, param_value_in_internal_repr, distribution | ||
) | ||
|
||
if len(params) == 0: | ||
continue | ||
for k in m.parameters: | ||
m.parameters[k] = params[f"{model_name}-{k}"] | ||
|
||
with tempfile.TemporaryDirectory() as output_path: | ||
run_merge( | ||
merge_config, | ||
out_path=output_path, | ||
options=MergeOptions( | ||
lora_merge_cache="/tmp", | ||
cuda=torch.cuda.is_available(), | ||
copy_tokenizer=True, | ||
lazy_unpickle=False, | ||
low_cpu_memory=False, | ||
), | ||
) | ||
llm = load_model(output_path) | ||
|
||
return llm | ||
|
||
|
||
def load_model(model_id: str) -> BaseLLM: | ||
bnbconf = BitsAndBytesConfig(load_in_4bit=True) | ||
tokenizer = AutoTokenizer.from_pretrained(model_id) | ||
model = AutoModelForCausalLM.frm_pretrained(model_id, quantization_config=bnbconf) | ||
llm = HuggingFacePipeline( | ||
pipeline=pipeline( | ||
"text-generation", | ||
model=model, | ||
tokenizer=tokenizer, | ||
max_new_tokens=16, | ||
temperature=0.7, | ||
do_sample=True, | ||
return_full_text=False, | ||
) | ||
) | ||
return llm |
Oops, something went wrong.