diff --git a/explainability-api/microservice/checklist.py b/explainability-api/microservice/checklist.py new file mode 100644 index 000000000..1a9859c18 --- /dev/null +++ b/explainability-api/microservice/checklist.py @@ -0,0 +1,179 @@ +# add functionality to process json to be injected into the db +import json +import logging +import requests +import itertools + +from typing import List + + +def create_query(skill, test_cases: List): + """ + Creates a query and make it suitable for sending to for prediction + + Args: + skill: input skill for which the checklist tests are run + test_cases (list) : Test cases as a list + + Returns: + json_object (json object) : A json object containing the test case and its prediction + answer (str) : Prediction for test case made by the skill + + """ + skill_type = skill["skill_type"] + base_model = skill["default_skill_args"].get("base_model") + adapter = skill["default_skill_args"].get("adapter") + # extract all tests + all_tests = [tests["test_cases"] for tests in test_cases] + # all_tests = list(itertools.chain.from_iterable([tests["test_cases"] for tests in test_cases])) + questions, contexts, answers = list(), list(), list() + + test_type = list(itertools.chain.from_iterable([[test["test_type"]] * len(test["test_cases"]) + for test in test_cases])) + capability = list(itertools.chain.from_iterable([[test["capability"]] * len(test["test_cases"]) + for test in test_cases])) + test_name = list(itertools.chain.from_iterable([[test["test_name"]] * len(test["test_cases"]) + for test in test_cases])) + + for tests in all_tests: + questions.append([query["question"] for query in tests]) + # list of list for mcq else list + contexts.append([query["context"] if skill_type != "multiple-choice" + else query["context"] + "\n" + "\n".join(query["options"]) + for query in tests]) + answers.extend([query.get("answer") if "answer" in query.keys() else query.get("prediction_before_change") + for query in tests]) + + # TODO + # send batch to the skill query endpoint + + prediction_requests = list() + # create the prediction request + for idx in range(len(questions)): + for question, context in zip(questions[idx], contexts[idx]): + request = dict() + request["num_results"] = 1 + request["user_id"] = "ukp" + request["skill_args"] = {"base_model": base_model, "adapter": adapter, "context": context} + request["query"] = question + prediction_requests.append(request) + + model_inputs = dict() + model_inputs["request"] = prediction_requests + model_inputs["answers"] = answers + model_inputs["test_type"] = test_type + model_inputs["capability"] = capability + model_inputs["test_name"] = test_name + # logger.info("inputs:", model_inputs) + + return model_inputs + + +def predict(model_inputs: dict, skill_id: str) -> list: + """ + Predicts a given query + + Args: + model_inputs (dict) : input for the model inference + skill_id (str) : id of skill for which the predictions need to be run + + Returns: + Returns the model predictions and success rate + """ + model_outputs = list() + try: + headers = {'Content-type': 'application/json'} + skill_query_url = f"https://square.ukp-lab.de/api/skill-manager/skill/{skill_id}/query" #note I hardcoded square URL here + model_predictions = list() + # i = 0 + for request in model_inputs["request"]: + response = requests.post(skill_query_url, data=json.dumps(request), headers=headers) + predictions = response.json() + model_predictions.append(predictions["predictions"][0]["prediction_output"]["output"]) + # i += 1 + # if i == 10: + # break + + # calculate success rate + success_rate = [pred == gold for pred, gold in zip(model_predictions, model_inputs["answers"])] + # overall_success_rate = sum(success_rate) / len(success_rate) + + for test_type, capability, test_name, request, answer, prediction, success in zip( + model_inputs["test_type"], + model_inputs["capability"], + model_inputs["test_name"], + model_inputs["request"], + model_inputs["answers"], + model_predictions, + success_rate + ): + model_outputs.append( + { + "skill_id": skill_id, + "test_type": test_type, + "capability": capability, + "test_name": test_name, + "question": request["query"], + "context": request["skill_args"]["context"], + "answer": answer, + "prediction": prediction, + "success": success + } + ) + # print(model_outputs) + except Exception as ex: + logging.info(ex) + return model_outputs + + +def test_name_analysis(model_outputs): + types_of_tests = {} + for element in list(set([result["test_type"] for result in model_outputs])): + types_of_tests[element] = dict() + for test in types_of_tests.keys(): + test_names = list(set([result["test_name"] for result in model_outputs if result["test_type"] == test])) + for name in test_names: + successful = 0 + failure = 0 + for result in model_outputs: + if result["test_name"] == name: + if result["success"]: + successful += 1 + else: + failure += 1 + types_of_tests[test][name] = dict({"successful": successful, "failure": failure}) + return [types_of_tests] + + +def capability_analysis(model_outputs): + types_of_tests = {} + for element in list(set([result["test_type"] for result in model_outputs])): + types_of_tests[element] = dict() + for test in types_of_tests.keys(): + test_capabilities = list(set([result["capability"] for result in model_outputs if result["test_type"] == test])) + for cap in test_capabilities: + successful = 0 + failure = 0 + for result in model_outputs: + if result["capability"] == cap: + if result["success"]: + successful += 1 + else: + failure += 1 + types_of_tests[test][cap] = dict({"successful": successful, "failure": failure}) + return [types_of_tests] + + +def test_type_analysis(model_outputs): + types_of_tests = {} + for element in list(set([result["test_type"] for result in model_outputs])): + successful = 0 + failure = 0 + for result in model_outputs: + if result['test_type'] == element: + if result['success']: + successful += 1 + else: + failure += 1 + types_of_tests[element] = dict({"successful": successful, "failure": failure}) + return [types_of_tests] diff --git a/explainability-api/microservice/checklist_api.py b/explainability-api/microservice/checklist_api.py new file mode 100644 index 000000000..2180377db --- /dev/null +++ b/explainability-api/microservice/checklist_api.py @@ -0,0 +1,78 @@ +import logging + +import requests +import json +from fastapi import APIRouter + +import checklist + +router = APIRouter() + + +@router.get('/') +def read_root(): + return {"Hello": "World"} + + +@router.get('/checklist/{skill_id}', name="run checklist") +def run_checklist(skill_id: str, checklist_name: str, n_tests: int = None, test_analysis: str = None) -> list: + """ + + :param skill_id: skill id + :param checklist_name: name of checklist + :param n_tests: show how many test from the first onwards + :param test_analysis: how to return the result of CheckList + :return: output + """ + # tested with this skill_id: 63cdbd06a8b0d566ef20cb54 - although performance is poor + assert checklist_name is not None + + checklist_path_dict = { + 'extractive': '../checklists/extractive_model_tests.json', + 'boolean': '../checklists/boolean_model_tests.json', + 'abstractive': '../checklists/abstractive_models_tests.json', + 'multiple_choice': '../checklists/multiple_choice_model_tests.json', + 'open_domain': '../checklists/open_domain_models_tests.json', + 'open_domain_bioasq': '../checklists/open_domain_models_bioasq_tests.json' + } + + checklist_path = checklist_path_dict[checklist_name] + with open(checklist_path) as f: + checklist_tests = json.load(f) + + try: + skill_response = requests.get(f'https://square.ukp-lab.de/api/skill-manager/skill/{skill_id}') + skill = skill_response.json() + skill_id = skill["id"] + # skill_type = skill["skill_type"] + + test_cases = checklist_tests['tests'] + model_inputs = checklist.create_query(skill, test_cases) + + if n_tests is not None: + model_inputs['request'] = model_inputs["request"][:n_tests] # if all would be too much + else: + model_inputs['request'] = model_inputs["request"] # if all would be too much + model_outputs = checklist.predict(model_inputs, skill_id) + + if test_analysis is None: + output_return = model_outputs + # Analysis result + else: + if test_analysis == 'test_type': + output_return = checklist.test_type_analysis(model_outputs) + elif test_analysis == 'capability': + output_return = checklist.capability_analysis(model_outputs) + elif test_analysis == 'test_name': + output_return = checklist.test_name_analysis(model_outputs) + + # assert output_return is not list + + # saves output as json + with open('temp_result/temp_result.json', 'w') as f: + json.dump(output_return, f, indent=4) + + return output_return + + except Exception as e: + print(e) diff --git a/explainability-api/microservice/main.py b/explainability-api/microservice/main.py new file mode 100644 index 000000000..fd67285c4 --- /dev/null +++ b/explainability-api/microservice/main.py @@ -0,0 +1,11 @@ +from fastapi import FastAPI + +import checklist_api + +app = FastAPI() + +app.include_router(checklist_api.router) + +if __name__ == "__main__": + import uvicorn + uvicorn.run("main:app", reload=True, timeout_keep_alive=200) # for dev purposes diff --git a/explainability-api/microservice/temp_result/temp_result.json b/explainability-api/microservice/temp_result/temp_result.json new file mode 100644 index 000000000..eb28ab165 --- /dev/null +++ b/explainability-api/microservice/temp_result/temp_result.json @@ -0,0 +1,44 @@ +[ + { + "MFT": { + "Temporal": { + "successful": 0, + "failure": 30 + }, + "Coref": { + "successful": 0, + "failure": 80 + }, + "Negation": { + "successful": 0, + "failure": 40 + }, + "Fairness": { + "successful": 0, + "failure": 10 + }, + "SRL": { + "successful": 0, + "failure": 100 + }, + "Taxonomy": { + "successful": 0, + "failure": 242 + }, + "Vocabulary": { + "successful": 0, + "failure": 70 + } + }, + "INV": { + "Robustness": { + "successful": 0, + "failure": 20 + }, + "NER": { + "successful": 0, + "failure": 100 + } + } + } +] \ No newline at end of file