From 4c2cdd75c271ca885a9c457173f6d24e648db7e4 Mon Sep 17 00:00:00 2001 From: Lee I-Shiang Date: Thu, 11 Jul 2024 23:47:45 +0800 Subject: [PATCH 1/2] feat(dataset): add presets --- walledeval/data/core.py | 24 +++++++++---------- walledeval/data/presets/AART.yaml | 3 +++ walledeval/data/presets/AdvBench.yaml | 3 +++ walledeval/data/presets/AdvancedAIRisk.yaml | 5 ++++ walledeval/data/presets/AegisSafetyTest.yaml | 3 +++ walledeval/data/presets/AyaRedTeaming.yaml | 11 +++++++++ walledeval/data/presets/BBQ.yaml | 14 +++++++++++ walledeval/data/presets/BeaverTailsEval.yaml | 3 +++ walledeval/data/presets/CBBQ.yaml | 17 +++++++++++++ walledeval/data/presets/CDNA.yaml | 3 +++ walledeval/data/presets/CPAD.yaml | 3 +++ walledeval/data/presets/CatHarmfulQA.yaml | 6 +++++ walledeval/data/presets/CyberSecEval.yaml | 11 +++++++++ walledeval/data/presets/DELPHI.yaml | 3 +++ walledeval/data/presets/DNA.yaml | 3 +++ walledeval/data/presets/DT-OOD.yaml | 3 +++ walledeval/data/presets/DTStereotype.yaml | 3 +++ walledeval/data/presets/DTToxicity.yaml | 6 +++++ .../data/presets/ForbiddenQuestions.yaml | 3 +++ walledeval/data/presets/HarmBench.yaml | 3 +++ walledeval/data/presets/JailbreakBench.yaml | 3 +++ walledeval/data/presets/JailbreakHub.yaml | 3 +++ .../data/presets/MaliciousInstruct.yaml | 3 +++ walledeval/data/presets/RTP.yaml | 3 +++ .../data/presets/SGSafetyQuestions.yaml | 3 +++ walledeval/data/presets/SafeText.yaml | 3 +++ walledeval/data/presets/SafetyDatasets.yaml | 3 +++ walledeval/data/presets/SaladBench.yaml | 3 +++ .../data/presets/SimpleSafetyTests.yaml | 5 ++++ walledeval/data/presets/StrongREJECT.yaml | 3 +++ walledeval/data/presets/TDC23-RedTeaming.yaml | 3 +++ walledeval/data/presets/TET.yaml | 3 +++ walledeval/data/presets/WMDP.yaml | 6 +++++ walledeval/data/presets/WildGuardTest.yaml | 3 +++ walledeval/data/presets/WildJailbreak.yaml | 3 +++ walledeval/data/presets/XSTest.yaml | 3 +++ 36 files changed, 171 insertions(+), 12 deletions(-) create mode 100644 walledeval/data/presets/AART.yaml create mode 100644 walledeval/data/presets/AdvBench.yaml create mode 100644 walledeval/data/presets/AdvancedAIRisk.yaml create mode 100644 walledeval/data/presets/AegisSafetyTest.yaml create mode 100644 walledeval/data/presets/AyaRedTeaming.yaml create mode 100644 walledeval/data/presets/BBQ.yaml create mode 100644 walledeval/data/presets/BeaverTailsEval.yaml create mode 100644 walledeval/data/presets/CBBQ.yaml create mode 100644 walledeval/data/presets/CDNA.yaml create mode 100644 walledeval/data/presets/CPAD.yaml create mode 100644 walledeval/data/presets/CatHarmfulQA.yaml create mode 100644 walledeval/data/presets/CyberSecEval.yaml create mode 100644 walledeval/data/presets/DELPHI.yaml create mode 100644 walledeval/data/presets/DNA.yaml create mode 100644 walledeval/data/presets/DT-OOD.yaml create mode 100644 walledeval/data/presets/DTStereotype.yaml create mode 100644 walledeval/data/presets/DTToxicity.yaml create mode 100644 walledeval/data/presets/ForbiddenQuestions.yaml create mode 100644 walledeval/data/presets/HarmBench.yaml create mode 100644 walledeval/data/presets/JailbreakBench.yaml create mode 100644 walledeval/data/presets/JailbreakHub.yaml create mode 100644 walledeval/data/presets/MaliciousInstruct.yaml create mode 100644 walledeval/data/presets/RTP.yaml create mode 100644 walledeval/data/presets/SGSafetyQuestions.yaml create mode 100644 walledeval/data/presets/SafeText.yaml create mode 100644 walledeval/data/presets/SafetyDatasets.yaml create mode 100644 walledeval/data/presets/SaladBench.yaml create mode 100644 walledeval/data/presets/SimpleSafetyTests.yaml create mode 100644 walledeval/data/presets/StrongREJECT.yaml create mode 100644 walledeval/data/presets/TDC23-RedTeaming.yaml create mode 100644 walledeval/data/presets/TET.yaml create mode 100644 walledeval/data/presets/WMDP.yaml create mode 100644 walledeval/data/presets/WildGuardTest.yaml create mode 100644 walledeval/data/presets/WildJailbreak.yaml create mode 100644 walledeval/data/presets/XSTest.yaml diff --git a/walledeval/data/core.py b/walledeval/data/core.py index 0013589b..5a12385d 100644 --- a/walledeval/data/core.py +++ b/walledeval/data/core.py @@ -8,7 +8,7 @@ import datasets #Dataset from walledeval.types import ( - MultipleChoiceQuestion, MultipleResponseQuestion, + MultipleChoiceQuestion, MultipleResponseQuestion, OpenEndedQuestion, Prompt, AutocompletePrompt, @@ -57,7 +57,7 @@ def from_hub(cls, name: str, **ds_kwargs): dataset = load_dataset(name, config, split=split, **ds_kwargs) return cls( - name + ("/" + config if config else "") + "/" + split, + name + ("/" + config if config else "") + "/" + split, dataset ) @@ -79,13 +79,13 @@ def sample(self, samples: Optional[int] = None) -> list[T]: class _HuggingFaceDatasetAlias: def __init__(self, model: type = Prompt): self.model = model - + def __call__(self, name: str, dataset: datasets.Dataset): return HuggingFaceDataset(name, dataset, self.model) - - def from_hub(self, - name: str, - config: Optional[str] = None, + + def from_hub(self, + name: str, + config: Optional[str] = None, split: str = "train", **ds_kwargs): return HuggingFaceDataset.from_hub( @@ -97,7 +97,7 @@ class HuggingFaceDataset(_HuggingFaceDataset): def __init__(self, name: str, dataset: datasets.Dataset, model: type = Prompt): _HuggingFaceDataset.__init__(self, name, dataset) self.model = model - + @classmethod def from_hub(cls, name: str, config: Optional[str] = None, @@ -106,11 +106,11 @@ def from_hub(cls, name: str, **ds_kwargs): dataset = load_dataset(name, config, split=split, **ds_kwargs) return cls( - name + ("/" + config if config else "") + "/" + split, + name + ("/" + config if config else "") + "/" + split, dataset, model ) - + def __class_getitem__(cls, model: type = Prompt): # Refer to https://stackoverflow.com/questions/73464414/why-are-generics-in-python-implemented-using-class-getitem-instead-of-geti # for why it is implemented like this @@ -173,7 +173,7 @@ def convert(self, sample: dict) -> SystemAssistedPrompt: ) -class JudgeQuestioningDataset(_HuggingFaceDataset[JudgeQuestioningPrompt]): +class JudgeQuestioningsafeteDataset(_HuggingFaceDataset[JudgeQuestioningPrompt]): def convert(self, sample: dict) -> JudgeQuestioningPrompt: return JudgeQuestioningPrompt( prompt=sample["prompt"], @@ -186,4 +186,4 @@ def convert(self, sample: dict) -> InjectionPrompt: return SystemAssistedPrompt( prompt=sample["prompt"], system=sample["system"] - ) \ No newline at end of file + ) diff --git a/walledeval/data/presets/AART.yaml b/walledeval/data/presets/AART.yaml new file mode 100644 index 00000000..3f9ce759 --- /dev/null +++ b/walledeval/data/presets/AART.yaml @@ -0,0 +1,3 @@ +name: AART +split: train +type: Prompt diff --git a/walledeval/data/presets/AdvBench.yaml b/walledeval/data/presets/AdvBench.yaml new file mode 100644 index 00000000..c0408fd5 --- /dev/null +++ b/walledeval/data/presets/AdvBench.yaml @@ -0,0 +1,3 @@ +name: AdvBench +split: train +type: Prompt diff --git a/walledeval/data/presets/AdvancedAIRisk.yaml b/walledeval/data/presets/AdvancedAIRisk.yaml new file mode 100644 index 00000000..c39188af --- /dev/null +++ b/walledeval/data/presets/AdvancedAIRisk.yaml @@ -0,0 +1,5 @@ +name: AdvancedAIRisk +split: + - lm + - human +type: MultipleChoiceQuestion diff --git a/walledeval/data/presets/AegisSafetyTest.yaml b/walledeval/data/presets/AegisSafetyTest.yaml new file mode 100644 index 00000000..c5eb0178 --- /dev/null +++ b/walledeval/data/presets/AegisSafetyTest.yaml @@ -0,0 +1,3 @@ +name: AegisSafetyTest +split: train +type: Prompt diff --git a/walledeval/data/presets/AyaRedTeaming.yaml b/walledeval/data/presets/AyaRedTeaming.yaml new file mode 100644 index 00000000..60ce837f --- /dev/null +++ b/walledeval/data/presets/AyaRedTeaming.yaml @@ -0,0 +1,11 @@ +name: AyaRedTeaming +split: + - arabic + - english + - filipino + - french + - hindi + - russian + - serbian + - spanish +type: Prompt diff --git a/walledeval/data/presets/BBQ.yaml b/walledeval/data/presets/BBQ.yaml new file mode 100644 index 00000000..cf1ae585 --- /dev/null +++ b/walledeval/data/presets/BBQ.yaml @@ -0,0 +1,14 @@ +name: BBQ +split: + - age + - disabilityStatus + - genderIdentity + - nationality + - physicalAppearance + - raceEthnicity + - raceXSes + - raceXGender + - religion + - ses + - sexualOrientation +type: MultipleChoiceQuestion diff --git a/walledeval/data/presets/BeaverTailsEval.yaml b/walledeval/data/presets/BeaverTailsEval.yaml new file mode 100644 index 00000000..dae15239 --- /dev/null +++ b/walledeval/data/presets/BeaverTailsEval.yaml @@ -0,0 +1,3 @@ +name: BeaverTailsEval +split: train +type: Prompt diff --git a/walledeval/data/presets/CBBQ.yaml b/walledeval/data/presets/CBBQ.yaml new file mode 100644 index 00000000..c564074c --- /dev/null +++ b/walledeval/data/presets/CBBQ.yaml @@ -0,0 +1,17 @@ +name: CBBQ +split: + - ses + - age + - disability + - disease + - educationalQualification + - ethnicity + - gender + - householdRegistration + - nationality + - physicalAppearance + - race + - region + - religion + - sexualOrientation +type: MultipleChoiceQuestion diff --git a/walledeval/data/presets/CDNA.yaml b/walledeval/data/presets/CDNA.yaml new file mode 100644 index 00000000..a037fa3f --- /dev/null +++ b/walledeval/data/presets/CDNA.yaml @@ -0,0 +1,3 @@ +name: CDNA +split: train +type: Prompt diff --git a/walledeval/data/presets/CPAD.yaml b/walledeval/data/presets/CPAD.yaml new file mode 100644 index 00000000..5f914447 --- /dev/null +++ b/walledeval/data/presets/CPAD.yaml @@ -0,0 +1,3 @@ +name: CPAD +split: train +type: Prompt diff --git a/walledeval/data/presets/CatHarmfulQA.yaml b/walledeval/data/presets/CatHarmfulQA.yaml new file mode 100644 index 00000000..45d5c665 --- /dev/null +++ b/walledeval/data/presets/CatHarmfulQA.yaml @@ -0,0 +1,6 @@ +name: CatHarmfulQA +split: + - en + - zh + - vi +type: Prompt diff --git a/walledeval/data/presets/CyberSecEval.yaml b/walledeval/data/presets/CyberSecEval.yaml new file mode 100644 index 00000000..9b5cb1b8 --- /dev/null +++ b/walledeval/data/presets/CyberSecEval.yaml @@ -0,0 +1,11 @@ +name: CyberSecEval +split: + - python + - php + - javascript + - rust + - java + - cpp + - c + - csharp +type: Prompt diff --git a/walledeval/data/presets/DELPHI.yaml b/walledeval/data/presets/DELPHI.yaml new file mode 100644 index 00000000..767dd59d --- /dev/null +++ b/walledeval/data/presets/DELPHI.yaml @@ -0,0 +1,3 @@ +name: DELPHI +split: train +type: Prompt diff --git a/walledeval/data/presets/DNA.yaml b/walledeval/data/presets/DNA.yaml new file mode 100644 index 00000000..53312a50 --- /dev/null +++ b/walledeval/data/presets/DNA.yaml @@ -0,0 +1,3 @@ +name: DNA +split: train +type: Prompt diff --git a/walledeval/data/presets/DT-OOD.yaml b/walledeval/data/presets/DT-OOD.yaml new file mode 100644 index 00000000..11322719 --- /dev/null +++ b/walledeval/data/presets/DT-OOD.yaml @@ -0,0 +1,3 @@ +name: DT-OOD +split: train +type: MultipleChoiceQuestion diff --git a/walledeval/data/presets/DTStereotype.yaml b/walledeval/data/presets/DTStereotype.yaml new file mode 100644 index 00000000..60b2a638 --- /dev/null +++ b/walledeval/data/presets/DTStereotype.yaml @@ -0,0 +1,3 @@ +name: DTStereotype +split: train +type: Prompt diff --git a/walledeval/data/presets/DTToxicity.yaml b/walledeval/data/presets/DTToxicity.yaml new file mode 100644 index 00000000..101e3ea2 --- /dev/null +++ b/walledeval/data/presets/DTToxicity.yaml @@ -0,0 +1,6 @@ +name: DTToxicity +split: + - rtp + - gpt3.5 + - gpt4 +type: Prompt diff --git a/walledeval/data/presets/ForbiddenQuestions.yaml b/walledeval/data/presets/ForbiddenQuestions.yaml new file mode 100644 index 00000000..4167d822 --- /dev/null +++ b/walledeval/data/presets/ForbiddenQuestions.yaml @@ -0,0 +1,3 @@ +name: ForbiddenQuestions +split: train +type: Prompt diff --git a/walledeval/data/presets/HarmBench.yaml b/walledeval/data/presets/HarmBench.yaml new file mode 100644 index 00000000..e2e3a939 --- /dev/null +++ b/walledeval/data/presets/HarmBench.yaml @@ -0,0 +1,3 @@ +name: HarmBench +split: train +type: Prompt diff --git a/walledeval/data/presets/JailbreakBench.yaml b/walledeval/data/presets/JailbreakBench.yaml new file mode 100644 index 00000000..e44aae23 --- /dev/null +++ b/walledeval/data/presets/JailbreakBench.yaml @@ -0,0 +1,3 @@ +name: JailbreakBench +split: train +type: Prompt diff --git a/walledeval/data/presets/JailbreakHub.yaml b/walledeval/data/presets/JailbreakHub.yaml new file mode 100644 index 00000000..5a818db9 --- /dev/null +++ b/walledeval/data/presets/JailbreakHub.yaml @@ -0,0 +1,3 @@ +name: JailbreakHub +split: train +type: Prompt diff --git a/walledeval/data/presets/MaliciousInstruct.yaml b/walledeval/data/presets/MaliciousInstruct.yaml new file mode 100644 index 00000000..64418c65 --- /dev/null +++ b/walledeval/data/presets/MaliciousInstruct.yaml @@ -0,0 +1,3 @@ +name: MaliciousInstruct +split: train +type: Prompt diff --git a/walledeval/data/presets/RTP.yaml b/walledeval/data/presets/RTP.yaml new file mode 100644 index 00000000..6d76cb71 --- /dev/null +++ b/walledeval/data/presets/RTP.yaml @@ -0,0 +1,3 @@ +name: RTP +split: train +type: Prompt diff --git a/walledeval/data/presets/SGSafetyQuestions.yaml b/walledeval/data/presets/SGSafetyQuestions.yaml new file mode 100644 index 00000000..24db3aeb --- /dev/null +++ b/walledeval/data/presets/SGSafetyQuestions.yaml @@ -0,0 +1,3 @@ +name: SGSafetyQuestions +split: train +type: Prompt diff --git a/walledeval/data/presets/SafeText.yaml b/walledeval/data/presets/SafeText.yaml new file mode 100644 index 00000000..ec82ac53 --- /dev/null +++ b/walledeval/data/presets/SafeText.yaml @@ -0,0 +1,3 @@ +name: SafeText +split: train +type: AutocompletePrompt diff --git a/walledeval/data/presets/SafetyDatasets.yaml b/walledeval/data/presets/SafetyDatasets.yaml new file mode 100644 index 00000000..072e4a20 --- /dev/null +++ b/walledeval/data/presets/SafetyDatasets.yaml @@ -0,0 +1,3 @@ +name: SafetyDatasets +split: train +type: Prompt diff --git a/walledeval/data/presets/SaladBench.yaml b/walledeval/data/presets/SaladBench.yaml new file mode 100644 index 00000000..315aed30 --- /dev/null +++ b/walledeval/data/presets/SaladBench.yaml @@ -0,0 +1,3 @@ +name: SaladBench +split: train +type: MultipleResponseQuestion diff --git a/walledeval/data/presets/SimpleSafetyTests.yaml b/walledeval/data/presets/SimpleSafetyTests.yaml new file mode 100644 index 00000000..f76b1caf --- /dev/null +++ b/walledeval/data/presets/SimpleSafetyTests.yaml @@ -0,0 +1,5 @@ +name: SimpleSafetyTests +split: + - info + - instruct +type: Prompt diff --git a/walledeval/data/presets/StrongREJECT.yaml b/walledeval/data/presets/StrongREJECT.yaml new file mode 100644 index 00000000..22fe7716 --- /dev/null +++ b/walledeval/data/presets/StrongREJECT.yaml @@ -0,0 +1,3 @@ +name: StrongREJECT +split: train +type: Prompt diff --git a/walledeval/data/presets/TDC23-RedTeaming.yaml b/walledeval/data/presets/TDC23-RedTeaming.yaml new file mode 100644 index 00000000..cd516727 --- /dev/null +++ b/walledeval/data/presets/TDC23-RedTeaming.yaml @@ -0,0 +1,3 @@ +name: TDC23-RedTeaming +split: train +type: Prompt diff --git a/walledeval/data/presets/TET.yaml b/walledeval/data/presets/TET.yaml new file mode 100644 index 00000000..e70a2e40 --- /dev/null +++ b/walledeval/data/presets/TET.yaml @@ -0,0 +1,3 @@ +name: TET +split: train +type: Prompt diff --git a/walledeval/data/presets/WMDP.yaml b/walledeval/data/presets/WMDP.yaml new file mode 100644 index 00000000..00c96eb8 --- /dev/null +++ b/walledeval/data/presets/WMDP.yaml @@ -0,0 +1,6 @@ +name: WMDP +split: + - bio + - chem + - cyber +type: MultipleChoiceQuestion diff --git a/walledeval/data/presets/WildGuardTest.yaml b/walledeval/data/presets/WildGuardTest.yaml new file mode 100644 index 00000000..eb0c394b --- /dev/null +++ b/walledeval/data/presets/WildGuardTest.yaml @@ -0,0 +1,3 @@ +name: WildGuardTest +split: train +type: Prompt diff --git a/walledeval/data/presets/WildJailbreak.yaml b/walledeval/data/presets/WildJailbreak.yaml new file mode 100644 index 00000000..37a68ade --- /dev/null +++ b/walledeval/data/presets/WildJailbreak.yaml @@ -0,0 +1,3 @@ +name: WildJailbreak +split: train +type: Prompt diff --git a/walledeval/data/presets/XSTest.yaml b/walledeval/data/presets/XSTest.yaml new file mode 100644 index 00000000..2e579b82 --- /dev/null +++ b/walledeval/data/presets/XSTest.yaml @@ -0,0 +1,3 @@ +name: XSTest +split: test +type: Prompt From f65cbd0cfdd86ee2611ef9d8fcad3afebc65334e Mon Sep 17 00:00:00 2001 From: Lee I-Shiang Date: Thu, 11 Jul 2024 23:56:53 +0800 Subject: [PATCH 2/2] feat(dataset): refactor preset names --- walledeval/data/presets/AegisSafetyTest.yaml | 3 --- walledeval/data/presets/BeaverTailsEval.yaml | 3 --- walledeval/data/presets/DTStereotype.yaml | 3 --- walledeval/data/presets/ForbiddenQuestions.yaml | 3 --- walledeval/data/presets/JailbreakBench.yaml | 3 --- walledeval/data/presets/JailbreakHub.yaml | 3 --- walledeval/data/presets/MaliciousInstruct.yaml | 3 --- walledeval/data/presets/SGSafetyQuestions.yaml | 3 --- walledeval/data/presets/SafetyDatasets.yaml | 3 --- walledeval/data/presets/StrongREJECT.yaml | 3 --- walledeval/data/presets/TDC23-RedTeaming.yaml | 3 --- walledeval/data/presets/WildGuardTest.yaml | 3 --- walledeval/data/presets/WildJailbreak.yaml | 3 --- walledeval/data/presets/{ => walledai}/AART.yaml | 2 +- walledeval/data/presets/{ => walledai}/AdvBench.yaml | 2 +- walledeval/data/presets/{ => walledai}/AdvancedAIRisk.yaml | 0 walledeval/data/presets/walledai/AegisSafetyTest.yaml | 3 +++ walledeval/data/presets/{ => walledai}/AyaRedTeaming.yaml | 2 +- walledeval/data/presets/{ => walledai}/BBQ.yaml | 2 +- walledeval/data/presets/walledai/BeaverTailsEval.yaml | 3 +++ walledeval/data/presets/{ => walledai}/CBBQ.yaml | 2 +- walledeval/data/presets/{ => walledai}/CDNA.yaml | 2 +- walledeval/data/presets/{ => walledai}/CPAD.yaml | 2 +- walledeval/data/presets/{ => walledai}/CatHarmfulQA.yaml | 2 +- walledeval/data/presets/{ => walledai}/CyberSecEval.yaml | 2 +- walledeval/data/presets/{ => walledai}/DELPHI.yaml | 2 +- walledeval/data/presets/{ => walledai}/DNA.yaml | 2 +- walledeval/data/presets/{ => walledai}/DT-OOD.yaml | 2 +- walledeval/data/presets/walledai/DTStereotype.yaml | 3 +++ walledeval/data/presets/{ => walledai}/DTToxicity.yaml | 2 +- walledeval/data/presets/walledai/ForbiddenQuestions.yaml | 3 +++ walledeval/data/presets/{ => walledai}/HarmBench.yaml | 2 +- walledeval/data/presets/walledai/JailbreakBench.yaml | 3 +++ walledeval/data/presets/walledai/JailbreakHub.yaml | 3 +++ walledeval/data/presets/walledai/MaliciousInstruct.yaml | 3 +++ walledeval/data/presets/{ => walledai}/RTP.yaml | 2 +- walledeval/data/presets/walledai/SGSafetyQuestions.yaml | 3 +++ walledeval/data/presets/{ => walledai}/SafeText.yaml | 2 +- walledeval/data/presets/walledai/SafetyDatasets.yaml | 3 +++ walledeval/data/presets/{ => walledai}/SaladBench.yaml | 2 +- walledeval/data/presets/{ => walledai}/SimpleSafetyTests.yaml | 2 +- walledeval/data/presets/walledai/StrongREJECT.yaml | 3 +++ walledeval/data/presets/walledai/TDC23-RedTeaming.yaml | 3 +++ walledeval/data/presets/{ => walledai}/TET.yaml | 2 +- walledeval/data/presets/{ => walledai}/WMDP.yaml | 2 +- walledeval/data/presets/walledai/WildGuardTest.yaml | 3 +++ walledeval/data/presets/walledai/WildJailbreak.yaml | 3 +++ walledeval/data/presets/{ => walledai}/XSTest.yaml | 2 +- 48 files changed, 60 insertions(+), 60 deletions(-) delete mode 100644 walledeval/data/presets/AegisSafetyTest.yaml delete mode 100644 walledeval/data/presets/BeaverTailsEval.yaml delete mode 100644 walledeval/data/presets/DTStereotype.yaml delete mode 100644 walledeval/data/presets/ForbiddenQuestions.yaml delete mode 100644 walledeval/data/presets/JailbreakBench.yaml delete mode 100644 walledeval/data/presets/JailbreakHub.yaml delete mode 100644 walledeval/data/presets/MaliciousInstruct.yaml delete mode 100644 walledeval/data/presets/SGSafetyQuestions.yaml delete mode 100644 walledeval/data/presets/SafetyDatasets.yaml delete mode 100644 walledeval/data/presets/StrongREJECT.yaml delete mode 100644 walledeval/data/presets/TDC23-RedTeaming.yaml delete mode 100644 walledeval/data/presets/WildGuardTest.yaml delete mode 100644 walledeval/data/presets/WildJailbreak.yaml rename walledeval/data/presets/{ => walledai}/AART.yaml (56%) rename walledeval/data/presets/{ => walledai}/AdvBench.yaml (52%) rename walledeval/data/presets/{ => walledai}/AdvancedAIRisk.yaml (100%) create mode 100644 walledeval/data/presets/walledai/AegisSafetyTest.yaml rename walledeval/data/presets/{ => walledai}/AyaRedTeaming.yaml (79%) rename walledeval/data/presets/{ => walledai}/BBQ.yaml (91%) create mode 100644 walledeval/data/presets/walledai/BeaverTailsEval.yaml rename walledeval/data/presets/{ => walledai}/CBBQ.yaml (92%) rename walledeval/data/presets/{ => walledai}/CDNA.yaml (56%) rename walledeval/data/presets/{ => walledai}/CPAD.yaml (56%) rename walledeval/data/presets/{ => walledai}/CatHarmfulQA.yaml (59%) rename walledeval/data/presets/{ => walledai}/CyberSecEval.yaml (77%) rename walledeval/data/presets/{ => walledai}/DELPHI.yaml (54%) rename walledeval/data/presets/{ => walledai}/DNA.yaml (57%) rename walledeval/data/presets/{ => walledai}/DT-OOD.yaml (65%) create mode 100644 walledeval/data/presets/walledai/DTStereotype.yaml rename walledeval/data/presets/{ => walledai}/DTToxicity.yaml (64%) create mode 100644 walledeval/data/presets/walledai/ForbiddenQuestions.yaml rename walledeval/data/presets/{ => walledai}/HarmBench.yaml (50%) create mode 100644 walledeval/data/presets/walledai/JailbreakBench.yaml create mode 100644 walledeval/data/presets/walledai/JailbreakHub.yaml create mode 100644 walledeval/data/presets/walledai/MaliciousInstruct.yaml rename walledeval/data/presets/{ => walledai}/RTP.yaml (57%) create mode 100644 walledeval/data/presets/walledai/SGSafetyQuestions.yaml rename walledeval/data/presets/{ => walledai}/SafeText.yaml (61%) create mode 100644 walledeval/data/presets/walledai/SafetyDatasets.yaml rename walledeval/data/presets/{ => walledai}/SaladBench.yaml (62%) rename walledeval/data/presets/{ => walledai}/SimpleSafetyTests.yaml (56%) create mode 100644 walledeval/data/presets/walledai/StrongREJECT.yaml create mode 100644 walledeval/data/presets/walledai/TDC23-RedTeaming.yaml rename walledeval/data/presets/{ => walledai}/TET.yaml (57%) rename walledeval/data/presets/{ => walledai}/WMDP.yaml (75%) create mode 100644 walledeval/data/presets/walledai/WildGuardTest.yaml create mode 100644 walledeval/data/presets/walledai/WildJailbreak.yaml rename walledeval/data/presets/{ => walledai}/XSTest.yaml (53%) diff --git a/walledeval/data/presets/AegisSafetyTest.yaml b/walledeval/data/presets/AegisSafetyTest.yaml deleted file mode 100644 index c5eb0178..00000000 --- a/walledeval/data/presets/AegisSafetyTest.yaml +++ /dev/null @@ -1,3 +0,0 @@ -name: AegisSafetyTest -split: train -type: Prompt diff --git a/walledeval/data/presets/BeaverTailsEval.yaml b/walledeval/data/presets/BeaverTailsEval.yaml deleted file mode 100644 index dae15239..00000000 --- a/walledeval/data/presets/BeaverTailsEval.yaml +++ /dev/null @@ -1,3 +0,0 @@ -name: BeaverTailsEval -split: train -type: Prompt diff --git a/walledeval/data/presets/DTStereotype.yaml b/walledeval/data/presets/DTStereotype.yaml deleted file mode 100644 index 60b2a638..00000000 --- a/walledeval/data/presets/DTStereotype.yaml +++ /dev/null @@ -1,3 +0,0 @@ -name: DTStereotype -split: train -type: Prompt diff --git a/walledeval/data/presets/ForbiddenQuestions.yaml b/walledeval/data/presets/ForbiddenQuestions.yaml deleted file mode 100644 index 4167d822..00000000 --- a/walledeval/data/presets/ForbiddenQuestions.yaml +++ /dev/null @@ -1,3 +0,0 @@ -name: ForbiddenQuestions -split: train -type: Prompt diff --git a/walledeval/data/presets/JailbreakBench.yaml b/walledeval/data/presets/JailbreakBench.yaml deleted file mode 100644 index e44aae23..00000000 --- a/walledeval/data/presets/JailbreakBench.yaml +++ /dev/null @@ -1,3 +0,0 @@ -name: JailbreakBench -split: train -type: Prompt diff --git a/walledeval/data/presets/JailbreakHub.yaml b/walledeval/data/presets/JailbreakHub.yaml deleted file mode 100644 index 5a818db9..00000000 --- a/walledeval/data/presets/JailbreakHub.yaml +++ /dev/null @@ -1,3 +0,0 @@ -name: JailbreakHub -split: train -type: Prompt diff --git a/walledeval/data/presets/MaliciousInstruct.yaml b/walledeval/data/presets/MaliciousInstruct.yaml deleted file mode 100644 index 64418c65..00000000 --- a/walledeval/data/presets/MaliciousInstruct.yaml +++ /dev/null @@ -1,3 +0,0 @@ -name: MaliciousInstruct -split: train -type: Prompt diff --git a/walledeval/data/presets/SGSafetyQuestions.yaml b/walledeval/data/presets/SGSafetyQuestions.yaml deleted file mode 100644 index 24db3aeb..00000000 --- a/walledeval/data/presets/SGSafetyQuestions.yaml +++ /dev/null @@ -1,3 +0,0 @@ -name: SGSafetyQuestions -split: train -type: Prompt diff --git a/walledeval/data/presets/SafetyDatasets.yaml b/walledeval/data/presets/SafetyDatasets.yaml deleted file mode 100644 index 072e4a20..00000000 --- a/walledeval/data/presets/SafetyDatasets.yaml +++ /dev/null @@ -1,3 +0,0 @@ -name: SafetyDatasets -split: train -type: Prompt diff --git a/walledeval/data/presets/StrongREJECT.yaml b/walledeval/data/presets/StrongREJECT.yaml deleted file mode 100644 index 22fe7716..00000000 --- a/walledeval/data/presets/StrongREJECT.yaml +++ /dev/null @@ -1,3 +0,0 @@ -name: StrongREJECT -split: train -type: Prompt diff --git a/walledeval/data/presets/TDC23-RedTeaming.yaml b/walledeval/data/presets/TDC23-RedTeaming.yaml deleted file mode 100644 index cd516727..00000000 --- a/walledeval/data/presets/TDC23-RedTeaming.yaml +++ /dev/null @@ -1,3 +0,0 @@ -name: TDC23-RedTeaming -split: train -type: Prompt diff --git a/walledeval/data/presets/WildGuardTest.yaml b/walledeval/data/presets/WildGuardTest.yaml deleted file mode 100644 index eb0c394b..00000000 --- a/walledeval/data/presets/WildGuardTest.yaml +++ /dev/null @@ -1,3 +0,0 @@ -name: WildGuardTest -split: train -type: Prompt diff --git a/walledeval/data/presets/WildJailbreak.yaml b/walledeval/data/presets/WildJailbreak.yaml deleted file mode 100644 index 37a68ade..00000000 --- a/walledeval/data/presets/WildJailbreak.yaml +++ /dev/null @@ -1,3 +0,0 @@ -name: WildJailbreak -split: train -type: Prompt diff --git a/walledeval/data/presets/AART.yaml b/walledeval/data/presets/walledai/AART.yaml similarity index 56% rename from walledeval/data/presets/AART.yaml rename to walledeval/data/presets/walledai/AART.yaml index 3f9ce759..063d54f6 100644 --- a/walledeval/data/presets/AART.yaml +++ b/walledeval/data/presets/walledai/AART.yaml @@ -1,3 +1,3 @@ -name: AART +name: walledai/AART split: train type: Prompt diff --git a/walledeval/data/presets/AdvBench.yaml b/walledeval/data/presets/walledai/AdvBench.yaml similarity index 52% rename from walledeval/data/presets/AdvBench.yaml rename to walledeval/data/presets/walledai/AdvBench.yaml index c0408fd5..a94cf553 100644 --- a/walledeval/data/presets/AdvBench.yaml +++ b/walledeval/data/presets/walledai/AdvBench.yaml @@ -1,3 +1,3 @@ -name: AdvBench +name: walledai/AdvBench split: train type: Prompt diff --git a/walledeval/data/presets/AdvancedAIRisk.yaml b/walledeval/data/presets/walledai/AdvancedAIRisk.yaml similarity index 100% rename from walledeval/data/presets/AdvancedAIRisk.yaml rename to walledeval/data/presets/walledai/AdvancedAIRisk.yaml diff --git a/walledeval/data/presets/walledai/AegisSafetyTest.yaml b/walledeval/data/presets/walledai/AegisSafetyTest.yaml new file mode 100644 index 00000000..3b86d302 --- /dev/null +++ b/walledeval/data/presets/walledai/AegisSafetyTest.yaml @@ -0,0 +1,3 @@ +name: walledai/AegisSafetyTest +split: train +type: Prompt diff --git a/walledeval/data/presets/AyaRedTeaming.yaml b/walledeval/data/presets/walledai/AyaRedTeaming.yaml similarity index 79% rename from walledeval/data/presets/AyaRedTeaming.yaml rename to walledeval/data/presets/walledai/AyaRedTeaming.yaml index 60ce837f..64a78010 100644 --- a/walledeval/data/presets/AyaRedTeaming.yaml +++ b/walledeval/data/presets/walledai/AyaRedTeaming.yaml @@ -1,4 +1,4 @@ -name: AyaRedTeaming +name: walledai/AyaRedTeaming split: - arabic - english diff --git a/walledeval/data/presets/BBQ.yaml b/walledeval/data/presets/walledai/BBQ.yaml similarity index 91% rename from walledeval/data/presets/BBQ.yaml rename to walledeval/data/presets/walledai/BBQ.yaml index cf1ae585..cfff776b 100644 --- a/walledeval/data/presets/BBQ.yaml +++ b/walledeval/data/presets/walledai/BBQ.yaml @@ -1,4 +1,4 @@ -name: BBQ +name: walledai/BBQ split: - age - disabilityStatus diff --git a/walledeval/data/presets/walledai/BeaverTailsEval.yaml b/walledeval/data/presets/walledai/BeaverTailsEval.yaml new file mode 100644 index 00000000..a69e9bbb --- /dev/null +++ b/walledeval/data/presets/walledai/BeaverTailsEval.yaml @@ -0,0 +1,3 @@ +name: walledai/BeaverTailsEval +split: train +type: Prompt diff --git a/walledeval/data/presets/CBBQ.yaml b/walledeval/data/presets/walledai/CBBQ.yaml similarity index 92% rename from walledeval/data/presets/CBBQ.yaml rename to walledeval/data/presets/walledai/CBBQ.yaml index c564074c..93e81b23 100644 --- a/walledeval/data/presets/CBBQ.yaml +++ b/walledeval/data/presets/walledai/CBBQ.yaml @@ -1,4 +1,4 @@ -name: CBBQ +name: walledai/CBBQ split: - ses - age diff --git a/walledeval/data/presets/CDNA.yaml b/walledeval/data/presets/walledai/CDNA.yaml similarity index 56% rename from walledeval/data/presets/CDNA.yaml rename to walledeval/data/presets/walledai/CDNA.yaml index a037fa3f..64b1b1b4 100644 --- a/walledeval/data/presets/CDNA.yaml +++ b/walledeval/data/presets/walledai/CDNA.yaml @@ -1,3 +1,3 @@ -name: CDNA +name: walledai/CDNA split: train type: Prompt diff --git a/walledeval/data/presets/CPAD.yaml b/walledeval/data/presets/walledai/CPAD.yaml similarity index 56% rename from walledeval/data/presets/CPAD.yaml rename to walledeval/data/presets/walledai/CPAD.yaml index 5f914447..ab7a735a 100644 --- a/walledeval/data/presets/CPAD.yaml +++ b/walledeval/data/presets/walledai/CPAD.yaml @@ -1,3 +1,3 @@ -name: CPAD +name: walledai/CPAD split: train type: Prompt diff --git a/walledeval/data/presets/CatHarmfulQA.yaml b/walledeval/data/presets/walledai/CatHarmfulQA.yaml similarity index 59% rename from walledeval/data/presets/CatHarmfulQA.yaml rename to walledeval/data/presets/walledai/CatHarmfulQA.yaml index 45d5c665..0901271e 100644 --- a/walledeval/data/presets/CatHarmfulQA.yaml +++ b/walledeval/data/presets/walledai/CatHarmfulQA.yaml @@ -1,4 +1,4 @@ -name: CatHarmfulQA +name: walledai/CatHarmfulQA split: - en - zh diff --git a/walledeval/data/presets/CyberSecEval.yaml b/walledeval/data/presets/walledai/CyberSecEval.yaml similarity index 77% rename from walledeval/data/presets/CyberSecEval.yaml rename to walledeval/data/presets/walledai/CyberSecEval.yaml index 9b5cb1b8..778ad3ff 100644 --- a/walledeval/data/presets/CyberSecEval.yaml +++ b/walledeval/data/presets/walledai/CyberSecEval.yaml @@ -1,4 +1,4 @@ -name: CyberSecEval +name: walledai/CyberSecEval split: - python - php diff --git a/walledeval/data/presets/DELPHI.yaml b/walledeval/data/presets/walledai/DELPHI.yaml similarity index 54% rename from walledeval/data/presets/DELPHI.yaml rename to walledeval/data/presets/walledai/DELPHI.yaml index 767dd59d..55428eda 100644 --- a/walledeval/data/presets/DELPHI.yaml +++ b/walledeval/data/presets/walledai/DELPHI.yaml @@ -1,3 +1,3 @@ -name: DELPHI +name: walledai/DELPHI split: train type: Prompt diff --git a/walledeval/data/presets/DNA.yaml b/walledeval/data/presets/walledai/DNA.yaml similarity index 57% rename from walledeval/data/presets/DNA.yaml rename to walledeval/data/presets/walledai/DNA.yaml index 53312a50..e20e3277 100644 --- a/walledeval/data/presets/DNA.yaml +++ b/walledeval/data/presets/walledai/DNA.yaml @@ -1,3 +1,3 @@ -name: DNA +name: walledai/DNA split: train type: Prompt diff --git a/walledeval/data/presets/DT-OOD.yaml b/walledeval/data/presets/walledai/DT-OOD.yaml similarity index 65% rename from walledeval/data/presets/DT-OOD.yaml rename to walledeval/data/presets/walledai/DT-OOD.yaml index 11322719..bcb12928 100644 --- a/walledeval/data/presets/DT-OOD.yaml +++ b/walledeval/data/presets/walledai/DT-OOD.yaml @@ -1,3 +1,3 @@ -name: DT-OOD +name: walledai/DT-OOD split: train type: MultipleChoiceQuestion diff --git a/walledeval/data/presets/walledai/DTStereotype.yaml b/walledeval/data/presets/walledai/DTStereotype.yaml new file mode 100644 index 00000000..6e5b549a --- /dev/null +++ b/walledeval/data/presets/walledai/DTStereotype.yaml @@ -0,0 +1,3 @@ +name: walledai/DTStereotype +split: train +type: Prompt diff --git a/walledeval/data/presets/DTToxicity.yaml b/walledeval/data/presets/walledai/DTToxicity.yaml similarity index 64% rename from walledeval/data/presets/DTToxicity.yaml rename to walledeval/data/presets/walledai/DTToxicity.yaml index 101e3ea2..c8b3db69 100644 --- a/walledeval/data/presets/DTToxicity.yaml +++ b/walledeval/data/presets/walledai/DTToxicity.yaml @@ -1,4 +1,4 @@ -name: DTToxicity +name: walledai/DTToxicity split: - rtp - gpt3.5 diff --git a/walledeval/data/presets/walledai/ForbiddenQuestions.yaml b/walledeval/data/presets/walledai/ForbiddenQuestions.yaml new file mode 100644 index 00000000..eb1fef6e --- /dev/null +++ b/walledeval/data/presets/walledai/ForbiddenQuestions.yaml @@ -0,0 +1,3 @@ +name: walledai/ForbiddenQuestions +split: train +type: Prompt diff --git a/walledeval/data/presets/HarmBench.yaml b/walledeval/data/presets/walledai/HarmBench.yaml similarity index 50% rename from walledeval/data/presets/HarmBench.yaml rename to walledeval/data/presets/walledai/HarmBench.yaml index e2e3a939..791f270f 100644 --- a/walledeval/data/presets/HarmBench.yaml +++ b/walledeval/data/presets/walledai/HarmBench.yaml @@ -1,3 +1,3 @@ -name: HarmBench +name: walledai/HarmBench split: train type: Prompt diff --git a/walledeval/data/presets/walledai/JailbreakBench.yaml b/walledeval/data/presets/walledai/JailbreakBench.yaml new file mode 100644 index 00000000..d74779e1 --- /dev/null +++ b/walledeval/data/presets/walledai/JailbreakBench.yaml @@ -0,0 +1,3 @@ +name: walledai/JailbreakBench +split: train +type: Prompt diff --git a/walledeval/data/presets/walledai/JailbreakHub.yaml b/walledeval/data/presets/walledai/JailbreakHub.yaml new file mode 100644 index 00000000..5b31035e --- /dev/null +++ b/walledeval/data/presets/walledai/JailbreakHub.yaml @@ -0,0 +1,3 @@ +name: walledai/JailbreakHub +split: train +type: Prompt diff --git a/walledeval/data/presets/walledai/MaliciousInstruct.yaml b/walledeval/data/presets/walledai/MaliciousInstruct.yaml new file mode 100644 index 00000000..77e80835 --- /dev/null +++ b/walledeval/data/presets/walledai/MaliciousInstruct.yaml @@ -0,0 +1,3 @@ +name: walledai/MaliciousInstruct +split: train +type: Prompt diff --git a/walledeval/data/presets/RTP.yaml b/walledeval/data/presets/walledai/RTP.yaml similarity index 57% rename from walledeval/data/presets/RTP.yaml rename to walledeval/data/presets/walledai/RTP.yaml index 6d76cb71..5a15a5c3 100644 --- a/walledeval/data/presets/RTP.yaml +++ b/walledeval/data/presets/walledai/RTP.yaml @@ -1,3 +1,3 @@ -name: RTP +name: walledai/RTP split: train type: Prompt diff --git a/walledeval/data/presets/walledai/SGSafetyQuestions.yaml b/walledeval/data/presets/walledai/SGSafetyQuestions.yaml new file mode 100644 index 00000000..9df461e9 --- /dev/null +++ b/walledeval/data/presets/walledai/SGSafetyQuestions.yaml @@ -0,0 +1,3 @@ +name: walledai/SGSafetyQuestions +split: train +type: Prompt diff --git a/walledeval/data/presets/SafeText.yaml b/walledeval/data/presets/walledai/SafeText.yaml similarity index 61% rename from walledeval/data/presets/SafeText.yaml rename to walledeval/data/presets/walledai/SafeText.yaml index ec82ac53..bc11b8ce 100644 --- a/walledeval/data/presets/SafeText.yaml +++ b/walledeval/data/presets/walledai/SafeText.yaml @@ -1,3 +1,3 @@ -name: SafeText +name: walledai/SafeText split: train type: AutocompletePrompt diff --git a/walledeval/data/presets/walledai/SafetyDatasets.yaml b/walledeval/data/presets/walledai/SafetyDatasets.yaml new file mode 100644 index 00000000..fbc06d5f --- /dev/null +++ b/walledeval/data/presets/walledai/SafetyDatasets.yaml @@ -0,0 +1,3 @@ +name: walledai/SafetyDatasets +split: train +type: Prompt diff --git a/walledeval/data/presets/SaladBench.yaml b/walledeval/data/presets/walledai/SaladBench.yaml similarity index 62% rename from walledeval/data/presets/SaladBench.yaml rename to walledeval/data/presets/walledai/SaladBench.yaml index 315aed30..d0ca576f 100644 --- a/walledeval/data/presets/SaladBench.yaml +++ b/walledeval/data/presets/walledai/SaladBench.yaml @@ -1,3 +1,3 @@ -name: SaladBench +name: walledai/SaladBench split: train type: MultipleResponseQuestion diff --git a/walledeval/data/presets/SimpleSafetyTests.yaml b/walledeval/data/presets/walledai/SimpleSafetyTests.yaml similarity index 56% rename from walledeval/data/presets/SimpleSafetyTests.yaml rename to walledeval/data/presets/walledai/SimpleSafetyTests.yaml index f76b1caf..3cd61b5c 100644 --- a/walledeval/data/presets/SimpleSafetyTests.yaml +++ b/walledeval/data/presets/walledai/SimpleSafetyTests.yaml @@ -1,4 +1,4 @@ -name: SimpleSafetyTests +name: walledai/SimpleSafetyTests split: - info - instruct diff --git a/walledeval/data/presets/walledai/StrongREJECT.yaml b/walledeval/data/presets/walledai/StrongREJECT.yaml new file mode 100644 index 00000000..b2536595 --- /dev/null +++ b/walledeval/data/presets/walledai/StrongREJECT.yaml @@ -0,0 +1,3 @@ +name: walledai/StrongREJECT +split: train +type: Prompt diff --git a/walledeval/data/presets/walledai/TDC23-RedTeaming.yaml b/walledeval/data/presets/walledai/TDC23-RedTeaming.yaml new file mode 100644 index 00000000..520170bc --- /dev/null +++ b/walledeval/data/presets/walledai/TDC23-RedTeaming.yaml @@ -0,0 +1,3 @@ +name: walledai/TDC23-RedTeaming +split: train +type: Prompt diff --git a/walledeval/data/presets/TET.yaml b/walledeval/data/presets/walledai/TET.yaml similarity index 57% rename from walledeval/data/presets/TET.yaml rename to walledeval/data/presets/walledai/TET.yaml index e70a2e40..fbf6e6b6 100644 --- a/walledeval/data/presets/TET.yaml +++ b/walledeval/data/presets/walledai/TET.yaml @@ -1,3 +1,3 @@ -name: TET +name: walledai/TET split: train type: Prompt diff --git a/walledeval/data/presets/WMDP.yaml b/walledeval/data/presets/walledai/WMDP.yaml similarity index 75% rename from walledeval/data/presets/WMDP.yaml rename to walledeval/data/presets/walledai/WMDP.yaml index 00c96eb8..7008250f 100644 --- a/walledeval/data/presets/WMDP.yaml +++ b/walledeval/data/presets/walledai/WMDP.yaml @@ -1,4 +1,4 @@ -name: WMDP +name: walledai/WMDP split: - bio - chem diff --git a/walledeval/data/presets/walledai/WildGuardTest.yaml b/walledeval/data/presets/walledai/WildGuardTest.yaml new file mode 100644 index 00000000..8acbd2b7 --- /dev/null +++ b/walledeval/data/presets/walledai/WildGuardTest.yaml @@ -0,0 +1,3 @@ +name: walledai/WildGuardTest +split: train +type: Prompt diff --git a/walledeval/data/presets/walledai/WildJailbreak.yaml b/walledeval/data/presets/walledai/WildJailbreak.yaml new file mode 100644 index 00000000..4d3faff9 --- /dev/null +++ b/walledeval/data/presets/walledai/WildJailbreak.yaml @@ -0,0 +1,3 @@ +name: walledai/WildJailbreak +split: train +type: Prompt diff --git a/walledeval/data/presets/XSTest.yaml b/walledeval/data/presets/walledai/XSTest.yaml similarity index 53% rename from walledeval/data/presets/XSTest.yaml rename to walledeval/data/presets/walledai/XSTest.yaml index 2e579b82..8822ecb9 100644 --- a/walledeval/data/presets/XSTest.yaml +++ b/walledeval/data/presets/walledai/XSTest.yaml @@ -1,3 +1,3 @@ -name: XSTest +name: walledai/XSTest split: test type: Prompt