-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Fix competency extraction feature (#145)
- Loading branch information
1 parent
2e4f640
commit 5765c9c
Showing
14 changed files
with
292 additions
and
37 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
22 changes: 22 additions & 0 deletions
22
app/domain/competency_extraction_pipeline_execution_dto.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
from typing import List | ||
|
||
from pydantic import Field, BaseModel | ||
|
||
from . import PipelineExecutionDTO | ||
from .data.competency_dto import CompetencyTaxonomy, Competency | ||
|
||
|
||
class CompetencyExtractionPipelineExecutionDTO(BaseModel): | ||
execution: PipelineExecutionDTO | ||
course_description: str = Field(alias="courseDescription") | ||
current_competencies: list[Competency] = Field( | ||
alias="currentCompetencies", default=[] | ||
) | ||
taxonomy_options: List[CompetencyTaxonomy] = Field( | ||
alias="taxonomyOptions", default=[] | ||
) | ||
max_n: int = Field( | ||
alias="maxN", | ||
description="Maximum number of competencies to extract from the course description", | ||
default=10, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,12 @@ | ||
from typing import List, Optional | ||
from typing import List | ||
|
||
from pydantic import Field | ||
|
||
from app.domain import PipelineExecutionDTO, PipelineExecutionSettingsDTO | ||
from app.domain import PipelineExecutionDTO | ||
from app.domain.data.lecture_unit_dto import LectureUnitDTO | ||
from app.domain.status.stage_dto import StageDTO | ||
|
||
|
||
class IngestionPipelineExecutionDto(PipelineExecutionDTO): | ||
lecture_units: List[LectureUnitDTO] = Field( | ||
..., alias="pyrisLectureUnitWebhookDTOS" | ||
) | ||
settings: Optional[PipelineExecutionSettingsDTO] | ||
initial_stages: Optional[List[StageDTO]] = Field( | ||
default=None, alias="initialStages" | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,16 @@ | ||
from pydantic import BaseModel | ||
from typing import Optional | ||
|
||
from pydantic import BaseModel, Field | ||
|
||
from app.domain.pipeline_execution_settings_dto import PipelineExecutionSettingsDTO | ||
from app.domain.status.stage_dto import StageDTO | ||
|
||
|
||
class PipelineExecutionDTO(BaseModel): | ||
pass | ||
settings: Optional[PipelineExecutionSettingsDTO] | ||
initial_stages: Optional[list[StageDTO]] = Field( | ||
default=None, alias="initialStages" | ||
) | ||
|
||
class Config: | ||
populate_by_name = True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
from app.domain.data.competency_dto import Competency | ||
from app.domain.status.status_update_dto import StatusUpdateDTO | ||
|
||
|
||
class CompetencyExtractionStatusUpdateDTO(StatusUpdateDTO): | ||
result: list[Competency] = [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
from ..pipeline.pipeline import Pipeline | ||
from app.pipeline.pipeline import Pipeline |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
import logging | ||
from typing import Optional | ||
|
||
from langchain.output_parsers import PydanticOutputParser | ||
from langchain_core.prompts import ( | ||
ChatPromptTemplate, | ||
) | ||
|
||
from app.domain import ( | ||
CompetencyExtractionPipelineExecutionDTO, | ||
PyrisMessage, | ||
IrisMessageRole, | ||
) | ||
from app.domain.data.text_message_content_dto import TextMessageContentDTO | ||
from app.domain.data.competency_dto import Competency | ||
from app.llm import CapabilityRequestHandler, RequirementList, CompletionArguments | ||
from app.pipeline import Pipeline | ||
from app.web.status.status_update import CompetencyExtractionCallback | ||
from app.pipeline.prompts.competency_extraction import system_prompt | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class CompetencyExtractionPipeline(Pipeline): | ||
callback: CompetencyExtractionCallback | ||
request_handler: CapabilityRequestHandler | ||
output_parser: PydanticOutputParser | ||
|
||
def __init__(self, callback: Optional[CompetencyExtractionCallback] = None): | ||
super().__init__( | ||
implementation_id="competency_extraction_pipeline_reference_impl" | ||
) | ||
self.callback = callback | ||
self.request_handler = CapabilityRequestHandler(requirements=RequirementList()) | ||
self.output_parser = PydanticOutputParser(pydantic_object=Competency) | ||
|
||
def __call__( | ||
self, | ||
dto: CompetencyExtractionPipelineExecutionDTO, | ||
prompt: Optional[ChatPromptTemplate] = None, | ||
**kwargs, | ||
): | ||
if not dto.course_description: | ||
raise ValueError("Course description is required") | ||
if not dto.taxonomy_options: | ||
raise ValueError("Taxonomy options are required") | ||
if not dto.max_n: | ||
raise ValueError("Non-zero max_n is required") | ||
|
||
taxonomy_options = ", ".join(dto.taxonomy_options) | ||
current_competencies = "\n\n".join( | ||
[c.model_dump_json(indent=4) for c in dto.current_competencies] | ||
) | ||
if current_competencies: | ||
current_competencies = ( | ||
f"\nHere are the current competencies in the course:\n{current_competencies}\n" | ||
f"Do not repeat these competencies.\n" | ||
) | ||
|
||
prompt = system_prompt.format( | ||
taxonomy_list=taxonomy_options, | ||
course_description=dto.course_description, | ||
max_n=dto.max_n, | ||
current_competencies=current_competencies, | ||
) | ||
prompt = PyrisMessage( | ||
sender=IrisMessageRole.SYSTEM, | ||
contents=[TextMessageContentDTO(text_content=prompt)], | ||
) | ||
|
||
response = self.request_handler.chat( | ||
[prompt], CompletionArguments(temperature=0.4) | ||
) | ||
response = response.contents[0].text_content | ||
|
||
generated_competencies: list[Competency] = [] | ||
|
||
# Find all competencies in the response up to the max_n | ||
competencies = response.split("\n\n")[: dto.max_n] | ||
for i, competency in enumerate(competencies): | ||
logger.debug(f"Processing competency {i + 1}: {competency}") | ||
if "{" not in competency or "}" not in competency: | ||
logger.debug("Skipping competency without JSON") | ||
continue | ||
# Get the competency JSON object | ||
start = competency.index("{") | ||
end = competency.index("}") + 1 | ||
competency = competency[start:end] | ||
try: | ||
competency = self.output_parser.parse(competency) | ||
except Exception as e: | ||
logger.debug(f"Error parsing competency: {e}") | ||
continue | ||
logger.debug(f"Generated competency: {competency}") | ||
generated_competencies.append(competency) | ||
self.callback.done(final_result=generated_competencies) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
system_prompt = """ | ||
You are an expert in all topics of computer science and its practical applications. | ||
Your task consists of three parts: | ||
1. Read the provided curriculum description a university course. | ||
2. Extract all learning goals ("competencies") from the course description. | ||
Each competency must contain the following fields: | ||
- title: | ||
The title of the competency, which is a specific topic or skill. This should be a short phrase of at most 4 words. | ||
- description: | ||
A detailed description of the competency in 2 to 5 bullet points. | ||
Each bullet point illustrates a specific skill or concept of the competency. | ||
Each bullet point is a complete sentence containing at most 15 words. | ||
Each bullet point is on a new line and starts with "- ". | ||
- taxonomy: | ||
The classification of the competency within Bloom's taxonomy. | ||
You must choose from these options in Bloom's taxonomy: {taxonomy_list} | ||
All competencies must meet the following requirements: | ||
- is mentioned in the course description. | ||
- corresponds to exactly one subject or skill covered in the course description. | ||
- is assigned to exactly one level of Bloom's taxonomy. | ||
- is small and fine-grained. Large topics should be broken down into smaller competencies. | ||
- does not overlap with other competencies: each competency is unique. Expanding on a previous competency is allowed. | ||
Here is the provided course description: {course_description} | ||
Here is a template competency in JSON format: | ||
{{ | ||
"title": "Competency Title", | ||
"description": "- You understand this.\n- You are proficient in doing that.\n- You know how to do this.", | ||
"taxonomy": "ANALYZE" | ||
}} | ||
{current_competencies} | ||
Respond with 0 to {max_n} competencies extracted from the course description, | ||
each in JSON format, split by two newlines. | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.