From 99c7d45845a681831b4ff6e869b831e3f81212e1 Mon Sep 17 00:00:00 2001 From: Mandana Vaziri Date: Mon, 9 Sep 2024 15:59:04 -0400 Subject: [PATCH] tutorial examples (#25) Signed-off-by: Mandana Vaziri --- examples/arith/Arith-new.pdl | 66 ++--- examples/arith/Arith-simple.pdl | 41 +-- examples/fibonacci/fib.pdl | 8 +- examples/roles/chatbot.pdl | 25 ++ examples/roles/marketing.pdl | 14 ++ examples/talk/1-hello.pdl | 3 + examples/talk/10-tools.pdl | 24 ++ examples/talk/11-react.pdl | 50 ++++ examples/talk/12-multiagent.pdl | 38 +++ examples/talk/2-hello.pdl | 8 + examples/talk/3-hello.pdl | 13 + examples/talk/4-hello.pdl | 17 ++ examples/talk/5-hello.pdl | 20 ++ examples/talk/6-chatbot.pdl | 21 ++ examples/talk/7-chatbot-roles.pdl | 36 +++ examples/talk/8-code-eval.pdl | 37 +++ examples/talk/9-sdg.pdl | 397 ++++++++++++++++++++++++++++++ examples/talk/data.yaml | 16 ++ examples/talk/ground_truth.txt | 3 + examples/talk/qna.yaml | 15 ++ 20 files changed, 799 insertions(+), 53 deletions(-) create mode 100644 examples/roles/chatbot.pdl create mode 100644 examples/roles/marketing.pdl create mode 100644 examples/talk/1-hello.pdl create mode 100644 examples/talk/10-tools.pdl create mode 100644 examples/talk/11-react.pdl create mode 100644 examples/talk/12-multiagent.pdl create mode 100644 examples/talk/2-hello.pdl create mode 100644 examples/talk/3-hello.pdl create mode 100644 examples/talk/4-hello.pdl create mode 100644 examples/talk/5-hello.pdl create mode 100644 examples/talk/6-chatbot.pdl create mode 100644 examples/talk/7-chatbot-roles.pdl create mode 100644 examples/talk/8-code-eval.pdl create mode 100644 examples/talk/9-sdg.pdl create mode 100644 examples/talk/data.yaml create mode 100644 examples/talk/ground_truth.txt create mode 100644 examples/talk/qna.yaml diff --git a/examples/arith/Arith-new.pdl b/examples/arith/Arith-new.pdl index e025429c..6850992e 100644 --- a/examples/arith/Arith-new.pdl +++ b/examples/arith/Arith-new.pdl @@ -3,37 +3,43 @@ document: - read: examples/arith/example1.txt - read: examples/arith/example2.txt - repeat: - - "\nQuestion: " - - def: QUESTION - model: ibm/granite-20b-code-instruct - params: - stop_sequences: - - Answer - include_stop_sequence: false - - "Answer: Let's think step by step.\n" - - repeat: - - def: REASON_OR_CALC - model: ibm/granite-20b-code-instruct + document: + - "\nQuestion: " + - def: QUESTION + model: ibm/granite-34b-code-instruct params: stop_sequences: - - '<<' + - Answer + - "?" include_stop_sequence: true - - if: '{{ REASON_OR_CALC.endswith("<<") }}' - then: - - def: EXPR - model: ibm/granite-20b-code-instruct - params: - stop_sequences: - - '=' - - "\n" - - "Question" - include_stop_sequence: false - - '= ' - - def: RESULT - lan: python - code: result = {{ EXPR }} - - ' >>' - until: '{{ "The answer is" in REASON_OR_CALC }}' - - "\n\n" + - "\nAnswer: Let's think step by step.\n" + - repeat: + document: + - def: REASON_OR_CALC + model: ibm/granite-34b-code-instruct + params: + stop_sequences: + - '<<' + - "Question" + include_stop_sequence: true + - if: '{{ REASON_OR_CALC.endswith("<<") }}' + then: + document: + - def: EXPR + model: ibm/granite-34b-code-instruct + params: + stop_sequences: + - '=' + - "\n" + - "Question" + include_stop_sequence: false + - '= ' + - def: RESULT + lan: python + code: result = {{ EXPR }} + - ' >>' + until: '{{ "The answer is" in REASON_OR_CALC }}' + as: document + - "\n\n" as: document - num_iterations: 3 + num_iterations: 3 \ No newline at end of file diff --git a/examples/arith/Arith-simple.pdl b/examples/arith/Arith-simple.pdl index 88fafb35..a90498fd 100644 --- a/examples/arith/Arith-simple.pdl +++ b/examples/arith/Arith-simple.pdl @@ -8,26 +8,29 @@ document: Last month he sold four large boxes and two small boxes. If he sold half as much this month, how much is his sales for this month? - repeat: - - def: REASON_OR_CALC - model: ibm/granite-20b-code-instruct - params: - stop_sequences: - - '<<' - include_stop_sequence: true - - if: '{{ REASON_OR_CALC.endswith("<<") }}' - then: - - def: EXPR + document: + - def: REASON_OR_CALC model: ibm/granite-20b-code-instruct params: stop_sequences: - - '=' - - "\n" - - "Question" - include_stop_sequence: false - - '= ' - - def: RESULT - lan: python - code: result = {{ EXPR }} - - ' >>' + - '<<' + include_stop_sequence: true + - if: '{{ REASON_OR_CALC.endswith("<<") }}' + then: + document: + - def: EXPR + model: ibm/granite-20b-code-instruct + params: + stop_sequences: + - '=' + - "\n" + - "Question" + include_stop_sequence: false + - '= ' + - def: RESULT + lan: python + code: result = {{ EXPR }} + - ' >>' + as: document until: '{{ "The answer is" in REASON_OR_CALC }}' -- "\n" +- "\n" \ No newline at end of file diff --git a/examples/fibonacci/fib.pdl b/examples/fibonacci/fib.pdl index ab3dedbd..9235d811 100644 --- a/examples/fibonacci/fib.pdl +++ b/examples/fibonacci/fib.pdl @@ -2,7 +2,7 @@ description: Fibonacci document: - def: CODE model: ibm/granite-20b-code-instruct - input: "Write just the definition for an efficient Python function to compute the Fibonacci sequence\n\n" + input: "Write an optimized Python function to compute the Fibonacci sequence\n\n" params: include_stop_sequence: false - "\nFind a random number between 1 and 20\n" @@ -12,17 +12,17 @@ document: import random result = random.randint(1, 20) show_result: true -- "\nNow computing fibonacci(" +- "\nNow computing fib(" - get: N - ")\n" - def: RESULT lan: python code: | {{ CODE }} - result = fibonacci({{ N }}) + result = fib({{ N }}) show_result: false - 'The result is: ' - get: RESULT - "\n\nExplain what the above code does and what the result means\n\n" - model: ibm/granite-20b-code-instruct -- "\n" +- "\n" \ No newline at end of file diff --git a/examples/roles/chatbot.pdl b/examples/roles/chatbot.pdl new file mode 100644 index 00000000..ed09ed18 --- /dev/null +++ b/examples/roles/chatbot.pdl @@ -0,0 +1,25 @@ +description: basic chatbot +document: +- role: system + document: + - | + You are Granite Chat, an AI language model developed by the IBM DMF Alignment Team. + You are a cautious assistant that carefully follows instructions. + You are helpful and harmless and you follow ethical guidelines and promote positive behavior. + You respond in a comprehensive manner unless instructed otherwise, providing explanations when needed while + maintaining a neutral tone. You are capable of coding, writing, and roleplaying. You are cautious and + refrain from generating real-time information, highly subjective or opinion-based topics. + You are harmless and refrain from generating content involving any form of bias, violence, + discrimination or inappropriate content. You always respond to greetings (for example, hi, hello, g'day, + morning, afternoon, evening, night, what's up, nice to meet you, sup, etc) with "Hello! I am Granite Chat, + created by the IBM DMF Alignment Team. How can I help you today?". Please do not say anything else + and do not start a conversation. +- repeat: + - role: user + read: + message: "" + def: query + - role: assistant + model: ibm/granite-13b-chat-v2 + - "\n" + until: "{{ query == 'stop'}}" \ No newline at end of file diff --git a/examples/roles/marketing.pdl b/examples/roles/marketing.pdl new file mode 100644 index 00000000..37d8a5b8 --- /dev/null +++ b/examples/roles/marketing.pdl @@ -0,0 +1,14 @@ +description: marketing email with roles +document: +- role: system + document: + - | + You are Granite Chat, an AI language model developed by IBM. You are a cautious assistant. + You carefully follow instructions. You are helpful and harmless and you follow ethical + guidelines and promote positive behavior. +- role: user + document: + " {{ instructions }}" +- role: assistant + document: + - model: ibm/granite-13b-chat-v2 \ No newline at end of file diff --git a/examples/talk/1-hello.pdl b/examples/talk/1-hello.pdl new file mode 100644 index 00000000..828212a6 --- /dev/null +++ b/examples/talk/1-hello.pdl @@ -0,0 +1,3 @@ +description: My first PDL program +document: +- Hello, World! diff --git a/examples/talk/10-tools.pdl b/examples/talk/10-tools.pdl new file mode 100644 index 00000000..b7c07fca --- /dev/null +++ b/examples/talk/10-tools.pdl @@ -0,0 +1,24 @@ +document: +- |- + What is 18 + 12 x 3? + Act: {"name": "Calc", "arguments": {"expr": "18 + 12 * 3"}} + Obs: 54 + + A total of 252 qualifying matches were played, and 723 goals were scored. What was the average number of goals per match? + Act: {"name": "Calc", "arguments": {"expr": "723 / 252"}} + Obs: 2.869047619047619 + + Out of 1400 participants, 400 passed the test. What percentage is that? + Act: +- def: action + model: ibm/granite-8b-code-instruct + params: + stop_sequences: ["\n"] + parser: json + spec: {name: str, arguments: {expr: str}} +- if: '{{ action.name == "Calc" }}' + then: + - "Obs: " + - lan: python + code: result = {{ action.arguments.expr }} +- "\n" \ No newline at end of file diff --git a/examples/talk/11-react.pdl b/examples/talk/11-react.pdl new file mode 100644 index 00000000..78a61142 --- /dev/null +++ b/examples/talk/11-react.pdl @@ -0,0 +1,50 @@ +document: +- | + What is the elevation range for the area that the eastern sector of the Colorado orogeny extends into? + Tho: I need to search Colorado orogeny, find the area that the eastern sector of the Colorado ... + Act: {"name": "Search", "arguments": {"topic": "Colorado orogeny"}} + Obs: The Colorado orogeny was an episode of mountain building (an orogeny) ... + Tho: It does not mention the eastern sector. So I need to look up eastern sector. + Tho: High Plains rise in elevation from around 1,800 to 7,000 ft, so the answer is 1,800 to 7,000 ft. + Act: {"name": "Finish", "arguments": {"topic": "1,800 to 7,000 ft"}} + + What profession does Nicholas Ray and Elia Kazan have in common? + Tho: I need to search Nicholas Ray and Elia Kazan, find their professions, then find the profession they have in common. + Act: {"name": "Search", "arguments": {"topic": "Nicholas Ray"}} + Obs: Nicholas Ray (born Raymond Nicholas Kienzle Jr., August 7, 1911 - June 16, 1979) was an American film director, screenwriter, and actor best known for the 1955 film Rebel Without a Cause. + Tho: Professions of Nicholas Ray are director, screenwriter, and actor. I need to search Elia Kazan next and find his professions. + Act: {"name": "Search", "arguments": {"topic": "Elia Kazan"}} + Obs: Elia Kazan was an American film and theatre director, producer, screenwriter and actor. + Tho: Professions of Elia Kazan are director, producer, screenwriter, and actor. So profession Nicholas Ray and Elia Kazan have in common is director, screenwriter, and actor. + Act: {"name": "Finish", "arguments": {"topic": "director, screenwriter, actor"}} + + When was the discoverer of the Hudson River born? +- repeat: + document: + - def: thought + model: ibm/granite-34b-code-instruct + params: + STOP_SEQUENCES: ["Act:"] + INCLUDE_STOP_SEQUENCE: true + - def: action + model: ibm/granite-34b-code-instruct + params: + STOP_SEQUENCES: ["\n"] + parser: json + spec: {name: str, arguments: {topic: str}} + - def: observation + if: '{{ action.name == "Search" }}' + then: + document: + - "Obs: " + - lan: python + code: | + import warnings, wikipedia + warnings.simplefilter("ignore") + try: + result = wikipedia.summary("{{ action.arguments.topic }}") + except wikipedia.WikipediaException as e: + result = str(e) + until: '{{ action.name != "Search" }}' + as: document +- "\n" \ No newline at end of file diff --git a/examples/talk/12-multiagent.pdl b/examples/talk/12-multiagent.pdl new file mode 100644 index 00000000..5a922260 --- /dev/null +++ b/examples/talk/12-multiagent.pdl @@ -0,0 +1,38 @@ +document: +- include: ../prompt_library/tools.pdl +- include: ../prompt_library/ReAct.pdl +- include: ../granite/granite_defs.pdl +- def: filtered_tools + call: filter_tools_by_name + show_result: false + args: + tools: "{{ default_tools }}" + tool_names: ["Search"] +- def: QUESTION + read: + message: "Please enter a question: " +- def: PROPOSED + call: react_json + args: + context: + - role: system + content: "{{ granite_models.granite_7b_lab.system_prompt }}" + question: "{{ QUESTION }}" + model: ibm/granite-34b-code-instruct + tools: "{{ filtered_tools }}" + trajectories: [] +- "\n\n----- Verifying answer... -----\n\n" +- def: VERIFIED + call: react_json + args: + context: [{"role": "system", "content": ""}] + question: |- + Is this the right answer to this question? + "{{ QUESTION }}" + Proposed answer: {{ PROPOSED.answer.topic }} + + Please answer as True or False. + model: ibm/granite-34b-code-instruct + tools: "{{ filtered_tools }}" + trajectories: [] +- "\n\nThe answer '{{ PROPOSED.answer.topic }}' has been verified as '{{VERIFIED.answer.topic}}'.\n" \ No newline at end of file diff --git a/examples/talk/2-hello.pdl b/examples/talk/2-hello.pdl new file mode 100644 index 00000000..8cbee00f --- /dev/null +++ b/examples/talk/2-hello.pdl @@ -0,0 +1,8 @@ +description: My first PDL program +document: +- Hello, +- model: ibm/granite-34b-code-instruct + params: + stop_sequences: ["!"] + include_stop_sequence: true +- "\n" diff --git a/examples/talk/3-hello.pdl b/examples/talk/3-hello.pdl new file mode 100644 index 00000000..62ed8e52 --- /dev/null +++ b/examples/talk/3-hello.pdl @@ -0,0 +1,13 @@ +description: My first PDL program +document: +- Hello, +- model: ibm/granite-34b-code-instruct + params: + stop_sequences: ["!"] + include_stop_sequence: true +- "\nTranslate this to French\n" +- model: ibm/granite-20b-multilingual + params: + stop_sequences: ["!"] + include_stop_sequence: true +- "\n" \ No newline at end of file diff --git a/examples/talk/4-hello.pdl b/examples/talk/4-hello.pdl new file mode 100644 index 00000000..783c6c2d --- /dev/null +++ b/examples/talk/4-hello.pdl @@ -0,0 +1,17 @@ +description: My first PDL program +document: +- Hello, +- model: ibm/granite-34b-code-instruct + def: name + params: + stop_sequences: ["!"] + include_stop_sequence: false +- "\n" +- model: ibm/granite-20b-multilingual + input: + document: + - "Translate the word '{{ name | trim }}' to French\n" + params: + stop_sequences: ["!"] + include_stop_sequence: true +- "\n" \ No newline at end of file diff --git a/examples/talk/5-hello.pdl b/examples/talk/5-hello.pdl new file mode 100644 index 00000000..4c70e982 --- /dev/null +++ b/examples/talk/5-hello.pdl @@ -0,0 +1,20 @@ +document: +- def: translate + function: + sentence: str + language: str + return: + - "\nTranslate the sentence '{{ sentence }}' to {{ language }}.\n" + - model: ibm/granite-20b-multilingual + params: + stop_sequences: ["\n"] +- call: translate + args: + sentence: I love Paris! + language: French +- "\n" +- call: translate + args: + sentence: I love Madrid! + language: Spanish +- "\n" \ No newline at end of file diff --git a/examples/talk/6-chatbot.pdl b/examples/talk/6-chatbot.pdl new file mode 100644 index 00000000..40e626a9 --- /dev/null +++ b/examples/talk/6-chatbot.pdl @@ -0,0 +1,21 @@ +description: chatbot +document: +- read: + def: query + message: "What is your query?\n" +- repeat: + document: + - model: ibm/granite-13b-chat-v2 + spec: str + params: + max_new_tokens: 200 + - read: + def: eval + message: "\nIs this a good answer[yes/no]?\n" + show_result: false + - if: "{{ eval == 'no' }}" + then: + - read: + message: "Why not?\n" + as: document + until: "{{ eval == 'yes'}}" \ No newline at end of file diff --git a/examples/talk/7-chatbot-roles.pdl b/examples/talk/7-chatbot-roles.pdl new file mode 100644 index 00000000..8f5a6882 --- /dev/null +++ b/examples/talk/7-chatbot-roles.pdl @@ -0,0 +1,36 @@ +description: chatbot +document: +- role: system + document: + - | + You are Granite Chat, an AI language model developed by the IBM DMF Alignment Team. + You are a cautious assistant that carefully follows instructions. + You are helpful and harmless and you follow ethical guidelines and promote positive behavior. + You respond in a comprehensive manner unless instructed otherwise, providing explanations when needed while + maintaining a neutral tone. You are capable of coding, writing, and roleplaying. You are cautious and + refrain from generating real-time information, highly subjective or opinion-based topics. + You are harmless and refrain from generating content involving any form of bias, violence, + discrimination or inappropriate content. You always respond to greetings (for example, hi, hello, g'day, + morning, afternoon, evening, night, what's up, nice to meet you, sup, etc) with "Hello! I am Granite Chat, + created by the IBM DMF Alignment Team. How can I help you today?". Please do not say anything else + and do not start a conversation. +- read: + message: "What is your query?\n" +- repeat: + document: + - model: ibm/granite-13b-chat-v2 + role: assistant + spec: str + params: + max_new_tokens: 200 + - read: + def: eval + message: "\nIs this a good answer[yes/no]?\n" + show_result: false + - if: "{{ eval == 'no' }}" + then: + - read: + message: "Why not?\n" + as: document + until: "{{ eval == 'yes'}}" +role: user \ No newline at end of file diff --git a/examples/talk/8-code-eval.pdl b/examples/talk/8-code-eval.pdl new file mode 100644 index 00000000..0cac2ad8 --- /dev/null +++ b/examples/talk/8-code-eval.pdl @@ -0,0 +1,37 @@ +description: Code explanation example +document: +- read: ./data.yaml + parser: yaml + def: CODE + show_result: False +- | + Here is some info about the location of the function in the repo. + repo: {{ CODE.repo_info.repo }} + path: {{ CODE.repo_info.path }} + Function_name: {{ CODE.repo_info.function_name }} + + + Explain the following code: + ``` + {{ CODE.source_code }}``` +- model: ibm/granite-34b-code-instruct + def: EXPLANATION +- read: ./ground_truth.txt + def: TRUTH + show_result: False +- | + + EVALUATION: + The similarity (Levenshtein) between this answer and the ground truth is: +- def: EVAL + lan: python + code: | + import textdistance + expl = """ + {{ EXPLANATION }} + """ + truth = """ + {{ TRUTH }} + """ + result = textdistance.levenshtein.normalized_similarity(expl, truth) +- "\n" \ No newline at end of file diff --git a/examples/talk/9-sdg.pdl b/examples/talk/9-sdg.pdl new file mode 100644 index 00000000..ddd3ee92 --- /dev/null +++ b/examples/talk/9-sdg.pdl @@ -0,0 +1,397 @@ +defs: + teacher_sys_prompt: You are a very knowledgeable AI Assistant that will faithfully assist the user with their task. + teacher_model: mistralai/mixtral-8x7b-instruct-v01 + teacher_template: + function: + sys_prompt: str + prompt: str + return: [INST] {{sys_prompt}} {{prompt}} [/INST] + teacher_stop_token: + + + question_template_freeform: + function: + num_samples: int + task_description: str + icl_question: str + spec: { introduction: str, principles: str, examples: str, generation: str, max_new_tokens: int } + return: + data: + introduction: | + You are asked to come up with a set of {{num_samples}} diverse questions - {{task_description}}. + principles: | + Please follow these guiding principles when generating responses: + * Use proper grammar and punctuation. + * Always generate safe and respectful content. Do not generate content that is harmful, abusive, or offensive. + * Always generate content that is factually accurate and relevant to the prompt. + * The questions should be clear and human-like. + * The questions should be diverse and cover a wide range of topics. + * The questions should not be template-based or generic, it should be very diverse. + * Simply return the questions, do not return any answers or explanations. + * Strictly adhere to the prompt and generate responses in the same style and format as the example. + Use this format to generate the questions: + ### Question 1: + examples: | + To better assist you with this task, here is an example: + ### Question 1: {{icl_question}} + generation: | + Now generate {{num_samples}} such questions, remember to follow the principles mentioned above and use the same format as the examples. Remember to use the same style and format as the example above. + max_new_tokens: 10000 + + gen_questions_freeform_inner: + function: + num_samples: int + task_description: str + icl_question: str + icl_answer: str + spec: [{icl_question: str, icl_answer: str, question: str}] + return: + - defs: + prompt_data: + call: question_template_freeform + spec: { introduction: str, principles: str, examples: str, generation: str, max_new_tokens: int } + args: + num_samples: "{{num_samples}}" + task_description: "{{task_description}}" + icl_question: "{{icl_question}}" + teacher_input: + call: teacher_template + args: + sys_prompt: "{{teacher_sys_prompt}}" + prompt: |- + {{prompt_data.introduction}} + {{prompt_data.principles}} + {{prompt_data.examples}} + {{prompt_data.generation}} + teacher_output: + model: "{{teacher_model}}" + input: "{{teacher_input}}" + params: + STOP_SEQUENCES: ["{{teacher_stop_token}}"] + INCLUDE_STOP_SEQUENCE: false + MAX_NEW_TOKENS: "{{prompt_data.max_new_tokens}}" + parser: + regex: '### Question [0-9]+:\s*([^#\n]+)' + mode: findall + - for: + question: "{{teacher_output}}" + repeat: + data: + icl_question: "{{icl_question}}" + icl_answer: "{{icl_answer}}" + question: "{{question}}" + + gen_questions_freeform: + function: + task_description: str + seed_examples: [{question: str, answer: str}] + spec: [{icl_question: str, icl_answer: str, question: str}] + return: + - defs: + list_of_lists: + for: + example: "{{seed_examples}}" + repeat: + call: gen_questions_freeform_inner + args: + num_samples: 2 + task_description: "{{task_description}}" + icl_question: "{{example.question}}" + icl_answer: "{{example.answer}}" + - lan: python + code: | # flatten list_of_lists into simple list + result = [q for qs in {{list_of_lists}} for q in qs] + + + filter_questions_template: + function: + task_description: str + question: str + spec: {introduction: str, principles: str, generation: str, max_new_tokens: int} + return: + data: + introduction: | + Please act as an impartial judge and evaluate the questions generated by an AI assistant displayed below. Evaluate whether or not the question is a good question of how AI Assistant should respond to the user's instruction. Please assign a score using a binary 0/1 scale. + principles: | + Here are the requirements: + * A large language model should be able to complete the question. For example, do not ask the assistant to create any visual or audio output. For another example, do not ask the assistant to wake you up at 5pm or set a reminder because it cannot perform any action. + * The questions should be in English. + * The questions should be 1 to 2 sentences long and should be properly formatted. + * The question should not be offensive, abusive, or harmful. It should be safe and respectful. + * The question should be relevant to the task given - {{task_description}}. + If the question meets the above requirements, please rate it 1. If not, please rate it 0. + generation: | + Begin your evaluation by providing a short explanation. Be as objective as possible. After providing your explanation, you must rate the question on a scale of 0 or 1 as mentioned above by strictly following this format: \"[[rating]]\", for example: \"Rating: [[1]]\" + Here is the question you need to evaluate: + {{question}} + max_new_tokens: 256 + + # https://github.com/instruct-lab/datagen-pipeline/blob/main/sdg/filter_questions.py + filter_questions_inner: + function: + task_description: str + question: str + spec: float + return: + - defs: + prompt_data: + call: filter_questions_template + spec: {introduction: str, principles: str, generation: str, max_new_tokens: int} + args: + task_description: "{{task_description}}" + question: "{{question}}" + teacher_input: + call: teacher_template + args: + sys_prompt: "{{teacher_sys_prompt}}" + prompt: |- + {{prompt_data.introduction}} + {{prompt_data.principles}} + {{prompt_data.generation}} + teacher_output: + model: "{{teacher_model}}" + input: "{{teacher_input}}" + params: + STOP_SEQUENCES: ["{{teacher_stop_token}}"] + INCLUDE_STOP_SEQUENCE: false + MAX_NEW_TOKENS: "{{prompt_data.max_new_tokens}}" + parser: + spec: { "rating": str } + # regex: "Rating.*\\[\\[(?P\\d+\\.?\\d*)\\]\\]" + regex: 'Rating.*\[\[(?P\d+\.?\d*)\]\]' + mode: search + - "{{ teacher_output.rating | float }}" + + filter_questions: + function: + task_description: str + questions: [{icl_question: str, icl_answer: str, question: str}] + spec: [{icl_question: str, icl_answer: str, question: str}] + return: + - defs: + list_of_pairs: + for: + question: "{{questions}}" + repeat: + - def: filter_output + call: filter_questions_inner + args: + task_description: "{{task_description}}" + question: "{{question.question}}" + - data: + question: "{{question}}" + keep: "{{filter_output}}" + filtered: + lan: python + code: | # keep only if "keep" column is non-zero + result = [p["question"] for p in {{list_of_pairs}} if p["keep"]] + - "{{filtered}}" + + + answer_template: + function: + icl_question: str + icl_response: str + question: str + spec: {introduction: str, principles: str, examples: str, generation: str, max_new_tokens: int, additional_stop_tokens: [str]} + return: + data: + introduction: Your task is to faithfully follow the user's prompt and generate a response. + principles: | + Please follow these guiding principles when generating responses: + * Use proper grammar and punctuation. + * Always generate safe and respectful content. Do not generate content that is harmful, abusive, or offensive. + * Always generate content that is factually accurate and relevant to the prompt. + * Strictly adhere to the prompt and generate responses in the same style and format as the example. + examples: | + To better assist you with this task, here is an example: + [Question] + {{icl_question}} + [Response] + {{icl_response}} + generation: | + Now generate a response to the following prompt. Remember to use the same style and format as the example above. + [Question] + {{question}} + [Response] + max_new_tokens: 2048 + additional_stop_tokens: + - "[Question]" + + + gen_answers_inner: + function: + question: {icl_question: str, icl_answer: str, question: str} + spec: {question: str, answer: str} + return: + - defs: + prompt_data: + call: answer_template + spec: {introduction: str, principles: str, examples: str, generation: str, max_new_tokens: int, additional_stop_tokens: [str]} + args: + icl_question: "{{question.icl_question}}" + icl_response: "{{question.icl_answer}}" + question: "{{question.question}}" + teacher_input: + call: teacher_template + args: + sys_prompt: "{{teacher_sys_prompt}}" + prompt: |- + {{prompt_data.introduction}} + {{prompt_data.principles}} + {{prompt_data.examples}} + {{prompt_data.generation}} + teacher_output: + model: "{{teacher_model}}" + input: "{{teacher_input}}" + params: + STOP_SEQUENCES: "{{ [teacher_stop_token] + prompt_data.additional_stop_tokens }}" + INCLUDE_STOP_SEQUENCE: false + MAX_NEW_TOKENS: "{{prompt_data.max_new_tokens}}" + parsed_answer: + lan: python + code: | # parse model output + result = """ {{teacher_output}} """.strip() + if "[Response]" in result: + result = result[result.find("[Response]") + len("[Response]"):] + if "[Question]" in result: + result = result[:result.find("[Question]")] + - data: + question: "{{question.question}}" + answer: "{{parsed_answer}}" + + gen_answers: + function: + questions: [{icl_question: str, icl_answer: str, question: str}] + spec: [{question: str, answer: str}] + return: + - defs: + all_results: + spec: [{question: str, answer: str}] + for: + question: "{{ questions }}" + repeat: + call: gen_answers_inner + args: + question: "{{question}}" + - lan: python + spec: [{question: str, answer: str}] + code: | # keep only if answer non-empty + result = [r for r in {{all_results}} if len(r["answer"]) > 0] + + + filter_qa_template: + function: + question: str + answer: str + spec: {introduction: str, principles: str, generation: str, max_new_tokens: int} + return: + data: + introduction: | + Please act as an impartial judge and evaluate the quality of the answer provided by an AI assistant to the questions displayed below. Evaluate whether or not the answer is a good example of how AI Assistant should respond to the user's instruction. Please assign a score using the following 3-point scale. + principles: | + 1: It means the answer is incorrect, irrelevant, unsafe or provides incomplete and garbage information. For instance, the answer may be factually wrong, off-topic, or filled with irrelevant content that doesn't address the user's question or it could be incomplete and hanging. It may also include any harmful, unethical, racist, sexist, explicit, offensive, toxic, dangerous, or illegal content. + + 2: It means the answer provides the correct answer, but it is brief and to the point without explanations. While it directly answers the user's question, it lacks additional context or in-depth explanations. + + 3: It means the answer is a perfect answer from an AI Assistant. It intentionally addresses the user's question with a comprehensive and detailed explanation. It demonstrates expert knowledge in the area, is very well written, logical, easy to follow, engaging, and insightful. And the answer is safe and does not include any harmful content. + generation: | + Begin your evaluation by providing a short explanation. Be as objective as possible. After providing your explanation, you must rate the answer on a scale of 1 to 3 as mentioned above by strictly following this format: \"[[rating]]\", for example: \"Rating: [[1]]\" + + Here is the question and the answer you need to evaluate: + [Question] + {{question}} + [Answer] + {{answer}} + max_new_tokens: 256 + + filter_question_answer_pair_inner: + function: + question: str + answer: str + spec: float + return: + - defs: + prompt_data: + call: filter_qa_template + spec: {introduction: str, principles: str, generation: str, max_new_tokens: int} + args: + question: "{{question}}" + answer: "{{answer}}" + teacher_input: + call: teacher_template + args: + sys_prompt: "{{teacher_sys_prompt}}" + prompt: |- + {{prompt_data.introduction}} + {{prompt_data.principles}} + {{prompt_data.generation}} + teacher_output: + model: "{{teacher_model}}" + input: "{{teacher_input}}" + params: + STOP_SEQUENCES: ["{{teacher_stop_token}}"] + INCLUDE_STOP_SEQUENCE: false + MAX_NEW_TOKENS: "{{prompt_data.max_new_tokens}}" + parser: + spec: { "rating": str } + regex: 'Rating.*\[\[(?P\d+\.?\d*)\]\]' + mode: search + - "{{ (teacher_output.rating if teacher_output.rating is not none else 0.0) | float}}" + + filter_question_answer_pair: + function: + qa_pairs: [{question: str, answer: str}] + spec: [{question: str, answer: str}] + return: + - defs: + ratings: + for: + qa_pair: "{{qa_pairs}}" + repeat: + - def: filter_output + call: filter_question_answer_pair_inner + spec: float + args: + question: "{{qa_pair.question}}" + answer: "{{qa_pair.answer}}" + - data: + qa_pair: "{{qa_pair}}" + rating: "{{filter_output}}" + filtered: + lan: python + spec: [{question: str, answer: str}] + code: | # keep only if rating is at least two + result = [p["qa_pair"] for p in {{ratings}} if p["rating"] >= 2] + - "{{filtered}}" + + +document: +- "----- Loading seed examples -----\n\n" +- def: seed_examples + read: ./qna.yaml + parser: yaml +- "\n\n----- Generating questions -----\n\n" +- def: generated_questions + call: gen_questions_freeform + spec: [{icl_question: str, icl_answer: str, question: str}] + args: + task_description: "{{seed_examples.task_description}}" + seed_examples: "{{seed_examples.seed_examples}}" +- "\n\n----- Filtering questions -----\n\n" +- def: filtered_questions + call: filter_questions + spec: [{icl_question: str, icl_answer: str, question: str}] + args: + task_description: "{{seed_examples.task_description}}" + questions: "{{generated_questions}}" +- "\n\n----- Generating answers -----\n\n" +- def: qa_pairs + call: gen_answers + args: + questions: "{{filtered_questions}}" +- "\n\n----- Filtering QA pairs -----\n\n" +- call: filter_question_answer_pair + args: + qa_pairs: "{{qa_pairs}}" +- "\n" \ No newline at end of file diff --git a/examples/talk/data.yaml b/examples/talk/data.yaml new file mode 100644 index 00000000..196e6f0c --- /dev/null +++ b/examples/talk/data.yaml @@ -0,0 +1,16 @@ +source_code: + | + @SuppressWarnings("unchecked") + public static Map deserializeOffsetMap(String lastSourceOffset) throws IOException { + Map offsetMap; + if (lastSourceOffset == null || lastSourceOffset.isEmpty()) { + offsetMap = new HashMap<>(); + } else { + offsetMap = JSON_MAPPER.readValue(lastSourceOffset, Map.class); + } + return offsetMap; + } +repo_info: + repo: streamsets/datacollector + path: stagesupport/src/main/java/com/.../OffsetUtil.java + function_name: OffsetUtil.deserializeOffsetMap \ No newline at end of file diff --git a/examples/talk/ground_truth.txt b/examples/talk/ground_truth.txt new file mode 100644 index 00000000..b448169d --- /dev/null +++ b/examples/talk/ground_truth.txt @@ -0,0 +1,3 @@ +The function `deserializeOffsetMap` takes a string as input and returns a map. It first checks if the input string is null or empty. If it is, it creates a new empty map and returns it. Otherwise, it uses the Jackson library to parse the input string into a map and returns it. + +The `@SuppressWarnings("unchecked")` annotation is used to suppress the warning that the type of the parsed map is not checked. This is because the Jackson library is used to parse the input string into a map, but the specific type of the map is not known at compile time. Therefore, the warning is suppressed to avoid potential issues. \ No newline at end of file diff --git a/examples/talk/qna.yaml b/examples/talk/qna.yaml new file mode 100644 index 00000000..50dcc1e8 --- /dev/null +++ b/examples/talk/qna.yaml @@ -0,0 +1,15 @@ +task_description: to teach a large language model to come up with puns +created_by: mizmo +seed_examples: +- question: Tell me a pun about birds. + answer: |- + Why do birds eat wood? + Because they're peckish! +- question: Tell me a pun about gas. + answer: |- + Why did the car have a belly ache? + Because it had too much gas! +- question: Tell me a pun about waves. + answer: |- + What did the ocean say to the ocean? + Nothing. It just waved! \ No newline at end of file