Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed Jul 16, 2024
1 parent ead4bb0 commit f32698f
Show file tree
Hide file tree
Showing 9 changed files with 29 additions and 29 deletions.
4 changes: 1 addition & 3 deletions applications/ColossalEval/colossal_eval/dataset/agieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,9 +197,7 @@ class AGIEvalDataset(BaseDataset):
"""

@staticmethod
def load(
path: str, logger: DistributedLogger, few_shot: bool, *args, **kwargs
) -> List[Dict]:
def load(path: str, logger: DistributedLogger, few_shot: bool, *args, **kwargs) -> List[Dict]:
dataset = {"test": {}}

files = glob.glob(os.path.join(path, "*.jsonl"))
Expand Down
8 changes: 5 additions & 3 deletions applications/ColossalEval/colossal_eval/dataset/base.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
from abc import abstractstaticmethod

from torch.utils.data import Dataset
from colossal_eval.utils import jdump
from torch.utils.data import Dataset

from colossalai.logging import DistributedLogger


Expand All @@ -25,12 +26,13 @@ def save(self, save_path):
def load(path, logger: DistributedLogger, *args, **kwargs):
"""Load the original dataset and convert it into the inference dataset"""


class DistributedDataset(Dataset):
def __init__(self, data):
self.data = data

def __len__(self):
return len(self.data)

def __getitem__(self, idx):
return self.data[idx]
4 changes: 1 addition & 3 deletions applications/ColossalEval/colossal_eval/dataset/ceval.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,7 @@ class CEvalDataset(BaseDataset):
"""

@staticmethod
def load(
path: str, logger: DistributedLogger, few_shot: bool, *args, **kwargs
) -> List[Dict]:
def load(path: str, logger: DistributedLogger, few_shot: bool, *args, **kwargs) -> List[Dict]:
dataset = {"dev": {}, "test": {}}
for split in ["dev", "test"]:
files = os.listdir(os.path.join(path, split))
Expand Down
4 changes: 1 addition & 3 deletions applications/ColossalEval/colossal_eval/dataset/cmmlu.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,7 @@ class CMMLUDataset(BaseDataset):
"""

@staticmethod
def load(
path: str, logger: DistributedLogger, few_shot: bool, *args, **kwargs
) -> List[Dict]:
def load(path: str, logger: DistributedLogger, few_shot: bool, *args, **kwargs) -> List[Dict]:
dataset = {"dev": {}, "test": {}}
for split in ["dev", "test"]:
files = os.listdir(os.path.join(path, split))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,7 @@ class GaoKaoBenchDataset(BaseDataset):
"""

@staticmethod
def load(
path: str, logger: DistributedLogger, *args, **kwargs
) -> List[Dict]:
def load(path: str, logger: DistributedLogger, *args, **kwargs) -> List[Dict]:
dataset = {"test": {}}
for category in ["Fill-in-the-blank_Questions", "Multiple-choice_Questions", "Open-ended_Questions"]:
files = os.listdir(os.path.join(path, "data", category))
Expand Down
4 changes: 1 addition & 3 deletions applications/ColossalEval/colossal_eval/dataset/mmlu.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,7 @@ class MMLUDataset(BaseDataset):
"""

@staticmethod
def load(
path: str, logger: DistributedLogger, few_shot: bool, *args, **kwargs
) -> List[Dict]:
def load(path: str, logger: DistributedLogger, few_shot: bool, *args, **kwargs) -> List[Dict]:
dataset = {"dev": {}, "test": {}}
for split in ["dev", "test"]:
files = os.listdir(os.path.join(path, split))
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
import copy
import math
from typing import Any, Dict, List, Optional, Tuple

import numpy as np
import torch
from torch.utils.data import DataLoader
from colossal_eval.utils import Conversation, get_batch_prompt, is_rank_0
from peft import PeftModel
from torch.utils.data import DataLoader
from tqdm import tqdm
from transformers import AutoConfig, AutoModel, AutoModelForCausalLM, AutoTokenizer

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,7 @@ def dict(self):
}


def get_few_shot_prefix(
few_shot_data: List[str], tokenizer: Optional[AutoTokenizer], max_tokens: int
) -> str:
def get_few_shot_prefix(few_shot_data: List[str], tokenizer: Optional[AutoTokenizer], max_tokens: int) -> str:
"""
Get few shot prefix.
Expand Down
23 changes: 17 additions & 6 deletions applications/ColossalEval/examples/dataset_evaluation/inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
from typing import Dict, List

import torch.distributed as dist
from torch.utils.data import DataLoader, DistributedSampler
from colossal_eval.dataset.base import DistributedDataset
from colossal_eval import dataset, models, utils
from colossal_eval.dataset.base import DistributedDataset
from torch.utils.data import DataLoader, DistributedSampler

import colossalai
from colossalai.accelerator import get_accelerator
Expand Down Expand Up @@ -217,11 +217,20 @@ def main(args):
dist_dataset = DistributedDataset(category_data["data"])
else:
dist_dataset = DistributedDataset(prev_questions)

sampler = DistributedSampler(dist_dataset, num_replicas=world_size, rank=rank, shuffle=False)
questions_loader = DataLoader(dist_dataset, batch_size=batch_size, sampler=sampler, num_workers=8, pin_memory=True, collate_fn=lambda x: x)
questions_loader = DataLoader(
dist_dataset,
batch_size=batch_size,
sampler=sampler,
num_workers=8,
pin_memory=True,
collate_fn=lambda x: x,
)
answers_per_rank = model_.inference(
data_loader=questions_loader, inference_kwargs=category_data["inference_kwargs"], debug=debug_args[dataset_name]
data_loader=questions_loader,
inference_kwargs=category_data["inference_kwargs"],
debug=debug_args[dataset_name],
)
prev_questions = answers_per_rank

Expand All @@ -244,7 +253,9 @@ def main(args):
del model_
accelerator.empty_cache()

utils.jdump(dataset_cat_num_mapping, os.path.join(args.inference_save_path, model_name, "dataset_cat_num_mapping.json"))
utils.jdump(
dataset_cat_num_mapping, os.path.join(args.inference_save_path, model_name, "dataset_cat_num_mapping.json")
)

dist.barrier()
if rank == 0:
Expand Down

0 comments on commit f32698f

Please sign in to comment.