Add batching functionality to evaluator #440

sgrubeml · 2023-02-10T19:14:33Z

What does this PR do?

This PR adds batching functionality to the evaluator for query calls to the skill manager. Instead of sending a query call for each data point, data points are "batched" into a single request and the skill manager returns the predictions for all data points at once.

Who can review?

@HaritzPuerto
@timbmg

Test locally

To test this PR locally

change square-core/skill-manager/Dockerfile to:

FROM python:3.10.8-slim-buster as base

ENV PYTHONUNBUFFERED=1
ENV PIP_NO_CACHE_DIR=1

# required to install packages from github
RUN apt-get -y update && apt-get -y install git

RUN pip install --upgrade pip

WORKDIR /app

COPY square-core/skill-manager/requirements.txt requirements.txt
RUN pip install -r requirements.txt

COPY square-skill-api square-skill-api
RUN pip install --editable square-skill-api

COPY square-core/skill-manager skill_manager

COPY square-core/skill-manager/logging.conf logging.conf

FROM base as test
RUN apt update && apt -y install docker.io

COPY requirements.dev.txt requirements.dev.txt 
RUN pip install -r requirements.dev.txt

COPY tests tests
COPY pytest.ini pytest.ini

FROM base as build
EXPOSE 8000

CMD ["uvicorn", "skill_manager.main:app", "--host", "0.0.0.0", "--port", "8000", "--log-config", "logging.conf"]

change square-core/skills/Dockefile to

FROM python:3.10.8-slim-buster as build

ENV PYTHONUNBUFFERED=1
ENV PIP_NO_CACHE_DIR=1

RUN apt-get -y update && apt-get -y install git

WORKDIR /app

RUN pip install --upgrade pip
COPY square-core/skills/requirements.txt requirements.txt
RUN pip install -r requirements.txt

COPY square-skill-api square-skill-api
RUN pip install --editable square-skill-api

COPY square-core/skills/main.py main.py
COPY square-core/skills/utils.py utils.py

COPY square-core/skills/extractive-qa/skill.py skill.py 
COPY square-core/skills/logging.conf logging.conf

EXPOSE 80

CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80", "--log-config", "logging.conf"]

change urls to call locally running skill-manager and extractive-qa skill.

Add the following docker-compose to the project root folder:

services:

  evaluator:
    image: ukpsquare/evaluator:latest
    build:
      context: evaluator
      target: build
    command: uvicorn evaluator.app.main:app --reload --host 0.0.0.0 --port 8081 --log-config logging.conf
    expose:
    - 8081
    ports:
      - 8081:8081
    env_file:
    - ./evaluator/.local.env
    environment:
      API_PREFIX: ""
      WEB_CONCURRENCY: "1"
      KEYCLOAK_BASE_URL: ""
      MODEL_API_URL: ""
      MONGO_HOST: mongodb
      MONGO_PORT: 27017
      VERIFY_ISSUER: "0"
      SQUARE_PRIVATE_KEY_FILE: /app/private_key.pem
      DATASET_DIR: /app/datasets/
      REDIS_HOST: redis
      REDIS_PORT: 6379
    depends_on:
    - mongodb
    - redis
    - rabbit
    volumes:
      - ./evaluator/evaluator:/app/evaluator
      - ./evaluator/datasets:/app/datasets/
      - ./evaluator/private_key.pem:/app/private_key.pem

  evaluation_worker:
    image: ukpsquare/evaluator:latest
    build:
      context: evaluator
      target: build
    command: celery -A evaluator.tasks worker --loglevel=info
    env_file:
    - ./evaluator/.local.env
    environment:
      API_PREFIX: ""
      WEB_CONCURRENCY: "1"
      KEYCLOAK_BASE_URL: ""
      MODEL_API_URL: ""
      MONGO_HOST: mongodb
      MONGO_PORT: 27017
      VERIFY_ISSUER: "0"
      SQUARE_PRIVATE_KEY_FILE: /app/private_key.pem
      DATASET_DIR: /app/datasets/
      REDIS_HOST: redis
      REDIS_PORT: 6379
    depends_on:
    - mongodb
    - redis
    - rabbit
    volumes:
      - ./evaluator/evaluator:/app/evaluator
      - ./evaluator//datasets:/app/datasets/
      - ./evaluator/private_key.pem:/app/private_key.pem

  mongodb:
    image: mongo:5.0.4
    restart: always
    volumes:
    - mongo-data:/data/db
    expose:
    - 27017
    ports:
    - 27017:27017
    env_file:
    - ./evaluator/.local.env

  redis:
    image: redis:latest
    ports:
    - 6379:6379
    env_file:
    - ./evaluator/.local.env
    command:
    - redis-server
    - --requirepass ${REDIS_PASSWORD}

  rabbit:
    hostname: rabbit
    image: rabbitmq:3-management
    ports:
      - 5672:5672
      - 15672:15672
    env_file:
      - ./evaluator/.local.env

  skill-manager:
    image: skill-manager-batching
    build:
      context: skill-manager
      target: build
    command: uvicorn skill_manager.main:app --reload --host 0.0.0.0 --port 8000 --log-config logging.conf --workers 2
    ports:
    - 8000:8000
    expose:
    - 8000
    env_file:
    - ./skill-manager/.local.env
    environment:
      API_PREFIX: ""
      WEB_CONCURRENCY: "1"
      KEYCLOAK_BASE_URL: ""
      MODEL_API_URL: ""
      MONGO_HOST: mongodb
      MONGO_PORT: 27017
      VERIFY_ISSUER: "0"
      REDIS_HOST: redis
      REDIS_PORT: 6379
      SQUARE_PRIVATE_KEY_FILE: /app/private_key.pem
    depends_on:
    - mongodb
    - redis
    volumes:
      - ./skill-manager/skill_manager:/app/skill_manager
      - ./evaluator/private_key.pem:/app/private_key.pem

  extractive-qa:
    image: skills-batching
    build:
      context: skills
      args:
        skill: extractive-qa
    environment:
      WEB_CONCURRENCY: "2"
      VERIFY_SSL: "1"
      SQUARE_API_URL: https://square.ukp-lab.de/api
      SQUARE_SKILL_MANAGER: http://skill-manager:8000/api
      KEYCLOAK_BASE_URL: ""
      REALM: square
      CLIENT_ID: ukp-extractive-qa
      SQUARE_PRIVATE_KEY_FILE: /app/private_key.pem
    volumes:
      - ./evaluator/private_key.pem:/app/private_key.pem
    ports:
      - 8083:80


volumes:
  mongo-data:

If skills not running locally (inference not possible), test batching by adding a mocked response to the extractive-qa skill in square-core/skills/extractive-qa/skill.py:

For testing batching of skill-manager ("topk" argument needs to be accounted for):

def get_model_response_topk_2_for_skill_manager():
    return {
        "model_outputs": {
            "start_logits": "k05VTVBZAQB2AHsnZGVzY3InOiAnPGY0JywgJ2ZvcnRyYW5fb3JkZXInOiBGYWxzZSwgJ3NoYXBlJzogKDIsIDcxKSwgfSAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAqwDQtAALhAwTQQQMH97k/Bi1tFwcBfJ8ENzj7Bjfk0wTTG7cAbcDbBix0ewbyIY8EztTvBV7VFwESw7cA6Hc7A3mMAwUNTCcHxEArBrccbwf9iJ8HYLxrBBQ4uwcGyGMGR4hbB8aZBwQW7FsGxJwnB2XwqwVHeRMHFJDjBLA8bwf+WKcHHTRTB9r7+wGTjsMDx9DHBbFLiwHsnGUAFW6e/oBTrwK33HcGdwwPBmMIavfE8u8ATbnTAKpvBQGt1Ar/4arfAteOGwFBg/r9ZCQ3BP9AVwTkuEMHi1VnAC8oQwTOFC8GMrgdAlyqOwIoAgsDrBQrBuC76wFvB6MCDngbB70m/wLvd3sBdr+HAMzE9wYAoH8H6wAnBaEvmwBYm/j/dUErBG5NIwYWJT8F2YkvBX2s7wSJ0UMEmQUDBZWkRwZ/WQsEgXC/B6QlhwQveOsGlv6vASVYMwfSu+MDBBBTBRb8QwVW7GsE5FynByUgtwbukJMGLbzDBD/Yhwb60HME2+EDBg+kewQihFcHnUzDBJ/5GwVdRPcE4lSLB+RoxweynH8Fg6AzBMI/bwHGeNcGQ19nAxNrUP5ch0r+Np/rAU7Eawchr38CYKijAaiwGwSbLz8C0tnVACJn/vzFdmsDpBg3ADPeOP8x0AMFs4AnBdn8HwYXyhr96KvDAhheiwAbd30An3O+/XWFBwI+lAMF2a+nAmsHYwKzt88DY1qvAFHDLwJ0L5sDDyTXB0sgZwaJRBMEUrN3A",
            "end_logits": "k05VTVBZAQB2AHsnZGVzY3InOiAnPGY0JywgJ2ZvcnRyYW5fb3JkZXInOiBGYWxzZSwgJ3NoYXBlJzogKDIsIDcxKSwgfSAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAo2hjFAT5IdwTvLRcFB2SnBHFJNwVObWcEGoyPBWxRgwULQM8FCpDvBJly+wA7PPcEGhSzBmEUSwSa/BsGwYknABVsnwWSsMMEEyibBf8z6wNnxAMGA5SLBNHU8wd0kOMG+xRLBDm8ywTKhM8F3tyPBLqAZwcNx8MALURnBDlkEwcL+y8AWgTbBhYAuwXJdi8D8hdPAcBiuv23lhsDtoPjAVpvewKcpksCCAQvBM/DVv7Ckfr9YXFPAAKXGPXfLwkB7kq3AQ70awTeL9sDOg93AF7yBwDSy9sCC3EXAC/USwK+LicBabH/AUDJBQHvqgsBkaAnBpBQMv7mzssBcORLBDRbrwFFr2sAUKxHBDt3twBZXiMBb+xnAMPHsvxBKK0As3i7B/EVJwYC3M8FoME/BzcVcwfBeP8HCA2LBZGVGwZhJRcH4PPLAcDFHwfh2K8GLnCrBUyEZwXqalcDC9TPBQPUzwY5vMMHz6xPBYqsNwUGqLMHpij7BD5E7wbIcG8HkgDTBnTM3wZw6LMEOwyjBoc8FwZ7wKcFaog/BC0rzwAxnOsFrCDXB1bS/wNho5sDEghC/9FeUwN9w78A6u/PAuJypwOLsBsHWs1nAqpszwH7lqMDRtJO/3IJ3QGEnn8AEMg7BHB32wLxg0sCJOwe/hBzVwNNT3r8sj5e/DEx1wEnD4b+sfdFAdghdwEsWAMHIN5o+ZniqwNGeBsE/3ubAhC3MwHZuFcGaPdzArv9zwCEz+L8wN4K/",
        },
        "model_output_is_encoded": True,
        "answers": [
            [
                {
                    "score": 0.8567973375320435,
                    "start": 177,
                    "end": 191,
                    "answer": "Denver Broncos",
                },
                {
                    "score": 0.03982760012149811,
                    "start": 177,
                    "end": 266,
                    "answer": "Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers",
                },
            ],
            [
                {
                    "score": 0.8632825613021851,
                    "start": 249,
                    "end": 266,
                    "answer": "Carolina Panthers",
                },
                {
                    "score": 0.036755505949258804,
                    "start": 177,
                    "end": 266,
                    "answer": "Denver Broncos defeated the National Football Conference (NFC) champion Carolina Panthers",
                },
            ],
        ],
        "questions": [],
        "contexts": [],
        "attributions": [],
        "adversarial": {"indices": []},
    }

For testing batching of evaluator ("topk" argument always set to 1):

def get_model_response_topk_1_for_evaluator():
    return {
        "model_outputs": {
            "start_logits": "k05VTVBZAQB2AHsnZGVzY3InOiAnPGY0JywgJ2ZvcnRyYW5fb3JkZXInOiBGYWxzZSwgJ3NoYXBlJzogKDIsIDcxKSwgfSAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAqwDQtAALhAwTQQQMH97k/Bi1tFwcBfJ8ENzj7Bjfk0wTTG7cAbcDbBix0ewbyIY8EztTvBV7VFwESw7cA6Hc7A3mMAwUNTCcHxEArBrccbwf9iJ8HYLxrBBQ4uwcGyGMGR4hbB8aZBwQW7FsGxJwnB2XwqwVHeRMHFJDjBLA8bwf+WKcHHTRTB9r7+wGTjsMDx9DHBbFLiwHsnGUAFW6e/oBTrwK33HcGdwwPBmMIavfE8u8ATbnTAKpvBQGt1Ar/4arfAteOGwFBg/r9ZCQ3BP9AVwTkuEMHi1VnAC8oQwTOFC8GMrgdAlyqOwIoAgsDrBQrBuC76wFvB6MCDngbB70m/wLvd3sBdr+HAMzE9wYAoH8H6wAnBaEvmwBYm/j/dUErBG5NIwYWJT8F2YkvBX2s7wSJ0UMEmQUDBZWkRwZ/WQsEgXC/B6QlhwQveOsGlv6vASVYMwfSu+MDBBBTBRb8QwVW7GsE5FynByUgtwbukJMGLbzDBD/Yhwb60HME2+EDBg+kewQihFcHnUzDBJ/5GwVdRPcE4lSLB+RoxweynH8Fg6AzBMI/bwHGeNcGQ19nAxNrUP5ch0r+Np/rAU7Eawchr38CYKijAaiwGwSbLz8C0tnVACJn/vzFdmsDpBg3ADPeOP8x0AMFs4AnBdn8HwYXyhr96KvDAhheiwAbd30An3O+/XWFBwI+lAMF2a+nAmsHYwKzt88DY1qvAFHDLwJ0L5sDDyTXB0sgZwaJRBMEUrN3A",
            "end_logits": "k05VTVBZAQB2AHsnZGVzY3InOiAnPGY0JywgJ2ZvcnRyYW5fb3JkZXInOiBGYWxzZSwgJ3NoYXBlJzogKDIsIDcxKSwgfSAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIAo2hjFAT5IdwTvLRcFB2SnBHFJNwVObWcEGoyPBWxRgwULQM8FCpDvBJly+wA7PPcEGhSzBmEUSwSa/BsGwYknABVsnwWSsMMEEyibBf8z6wNnxAMGA5SLBNHU8wd0kOMG+xRLBDm8ywTKhM8F3tyPBLqAZwcNx8MALURnBDlkEwcL+y8AWgTbBhYAuwXJdi8D8hdPAcBiuv23lhsDtoPjAVpvewKcpksCCAQvBM/DVv7Ckfr9YXFPAAKXGPXfLwkB7kq3AQ70awTeL9sDOg93AF7yBwDSy9sCC3EXAC/USwK+LicBabH/AUDJBQHvqgsBkaAnBpBQMv7mzssBcORLBDRbrwFFr2sAUKxHBDt3twBZXiMBb+xnAMPHsvxBKK0As3i7B/EVJwYC3M8FoME/BzcVcwfBeP8HCA2LBZGVGwZhJRcH4PPLAcDFHwfh2K8GLnCrBUyEZwXqalcDC9TPBQPUzwY5vMMHz6xPBYqsNwUGqLMHpij7BD5E7wbIcG8HkgDTBnTM3wZw6LMEOwyjBoc8FwZ7wKcFaog/BC0rzwAxnOsFrCDXB1bS/wNho5sDEghC/9FeUwN9w78A6u/PAuJypwOLsBsHWs1nAqpszwH7lqMDRtJO/3IJ3QGEnn8AEMg7BHB32wLxg0sCJOwe/hBzVwNNT3r8sj5e/DEx1wEnD4b+sfdFAdghdwEsWAMHIN5o+ZniqwNGeBsE/3ubAhC3MwHZuFcGaPdzArv9zwCEz+L8wN4K/",
        },
        "model_output_is_encoded": True,
        "answers": [
            [
                {
                    "score": 0.8567973375320435,
                    "start": 177,
                    "end": 191,
                    "answer": "Denver Broncos",
                }
            ],
            [
                {
                    "score": 0.8632825613021851,
                    "start": 249,
                    "end": 266,
                    "answer": "Carolina Panthers",
                }
            ],
        ],
        "questions": [],
        "contexts": [],
        "attributions": [],
        "adversarial": {"indices": []},
    }

Replace the call to the model or the model_response with a call to the method mocking the response from the model in .
model_response = get_model_response_topk_1_for_evaluator()

Mocked data response is for query with batch size 2, make sure to add only 2 questions to the query, for example add queries = queries[:2] at https://github.com/nclskfm/square-core/blob/master/skill-manager/skill_manager/routers/skill.py#L303

Clone the square-skill-api repo to the parent folder of the square-core project. Folder structure should look like this:

root folder
-- square-core
-- square-skill-api

change the query attribute of the QueryRequest class in https://github.com/UKP-SQuARE/square-skill-api/blob/master/square_skill_api/models/request.py#L97 to this:

   query: Union[str, List[str]] = Field(
        ..., description="The input to the model that is entered by the user"
    )

Make sure to import Union and List.

Go to root folder and run the following command from your terminal:

docker build -t skill-manager-batching -f square-core/skill-manager/Dockerfile . && docker build -t skills-batching -f square-core/skills/Dockerfile .

go to the square-core folder and run the following command:

docker compose --env-file evaluator/.local.env up -d

I hope i didnt forget anything :)

… manager

Batching queries skill manager

Develop

…r-compose file

Add BATCH_SIZE variable to docker-compose

sgrubeml and others added 16 commits February 1, 2023 19:37

add first version of bacthing, model data mocked.

c8a88e1

add batching of data points to evaluator service for queries to skill…

e7ea963

… manager

merge with develop branch

d1c8582

clean up repo

3554741

clean up repo

e789678

clean up repo

2f65c3f

add change requests for PR

7a629a6

add small change request to PR

0ea0062

rename qa_type variable to context_type

53d2c71

batch dataset into query instead of passsing whole dataset at once

ae51fed

fix small bug

eceaef6

Merge pull request #73 from nclskfm/batching-queries-skill-manager

3b5308e

Batching queries skill manager

Merge branch 'master' into develop

93d4cc9

Merge pull request #75 from nclskfm/develop

aa69d44

Develop

add BATCH_SIZE variable previously defined in .local.env to the docke…

ebe347f

…r-compose file

Merge pull request #76 from nclskfm/develop

4aca431

Add BATCH_SIZE variable to docker-compose

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add batching functionality to evaluator #440

Add batching functionality to evaluator #440

sgrubeml commented Feb 10, 2023

Add batching functionality to evaluator #440

Are you sure you want to change the base?

Add batching functionality to evaluator #440

Conversation

sgrubeml commented Feb 10, 2023

What does this PR do?

Who can review?

Test locally