Skip to content

Commit

Permalink
Dev/synthetic gen refactor (#50)
Browse files Browse the repository at this point in the history
* add unsaved changes, delete unnecessary files
  • Loading branch information
ngc436 authored Jan 28, 2025
1 parent 9f7e0db commit 75fcae3
Show file tree
Hide file tree
Showing 7 changed files with 20 additions and 13 deletions.
Binary file removed examples/.DS_Store
Binary file not shown.
4 changes: 2 additions & 2 deletions protollm-synthetic/examples/aspect_summarisation_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@

aspect = "politics"

qwen_large_api_key = os.environ.get("QWEN2VL_OPENAI_API_KEY")
qwen_large_api_base = os.environ.get("QWEN2VL_OPENAI_API_BASE")
qwen_large_api_key = os.environ.get("OPENAI_API_KEY")
qwen_large_api_base = os.environ.get("OPENAI_API_BASE")

llm=VLLMChatOpenAI(
api_key=qwen_large_api_key,
Expand Down
4 changes: 2 additions & 2 deletions protollm-synthetic/examples/quiz_example.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import os
from samplefactory.synthetic_pipelines.chains import QuizChain
from samplefactory.utils import Dataset, VLLMChatOpenAI
from protollm_synthetic.synthetic_pipelines.chains import QuizChain
from protollm_synthetic.utils import Dataset, VLLMChatOpenAI
import json
import asyncio

Expand Down
11 changes: 9 additions & 2 deletions protollm-synthetic/examples/rag_example.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
import os
import os
import json
import logging
from protollm_synthetic.synthetic_pipelines.chains import RAGChain
from protollm_synthetic.utils import Dataset, VLLMChatOpenAI
import asyncio

import logging
from protollm_synthetic.synthetic_pipelines.chains import RAGChain
from protollm_synthetic.utils import Dataset, VLLMChatOpenAI
import asyncio


# Сохраняем набор данных
texts = [
Expand Down Expand Up @@ -98,8 +104,8 @@
path = 'tmp_data/sample_data_rag_spb.json'
dataset = Dataset(data_col='content', path=path)

qwen_large_api_key = os.environ.get("QWEN_OPENAI_API_KEY")
qwen_large_api_base = os.environ.get("QWEN_OPENAI_API_BASE")
qwen_large_api_key = os.environ.get("OPENAI_API_KEY")
qwen_large_api_base = os.environ.get("OPENAI_API_BASE")

logger.info("Initializing LLM connection")

Expand Down Expand Up @@ -130,4 +136,5 @@
logger.info(f"Writing result to {path}")
df.to_json(path, orient="records")

logger.info("Generation successfully finished")
logger.info("Generation successfully finished")
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
import copy
from datetime import datetime
import logging
from samplefactory.synthetic_pipelines.prompts import (generate_summary_system_prompt, generate_summary_evaluation_system_prompt,
from protollm_synthetic.synthetic_pipelines.prompts import (generate_summary_system_prompt, generate_summary_evaluation_system_prompt,
generate_rag_system_prompt, check_summary_quality_human_prompt,
generate_rag_human_prompt, generate_aspect_summarisation_prompt,
generate_summary_human_prompt, generate_aspect_summarisation_evaluation_system_prompt,
generate_quiz_system_prompt, generate_quiz_human_prompt,
generate_instruction_one_shot_system_prompt, generate_instruction_one_shot_human_prompt,
merge_instructions, merge_instructions_human_prompt)
from samplefactory.utils import Dataset
from protollm_synthetic.utils import Dataset
import numpy as np
import asyncio
from typing import List, Optional, Dict, Any, TypeVar, cast
Expand All @@ -26,10 +26,10 @@
RunnableParallel, RunnableLambda)
from langchain.chains.combine_documents import create_stuff_documents_chain
from openai import APIConnectionError
from samplefactory.synthetic_pipelines.genetic_evolver import GeneticEvolver
from protollm_synthetic.synthetic_pipelines.genetic_evolver import GeneticEvolver

import random
from samplefactory.synthetic_pipelines.schemes import (SummaryQualitySchema,
from protollm_synthetic.synthetic_pipelines.schemes import (SummaryQualitySchema,
RAGScheme, AspectSummarisationQualitySchema,
QuizScheme, FreeQueryScheme, FreeQueryMerger)

Expand Down
2 changes: 1 addition & 1 deletion protollm-synthetic/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[tool.poetry]
name = "samplefactory"
name = "protollm-synthetic"
version = "0.1.0"
description = "Sample generation with LLMs"
authors = ["Your Name <[email protected]>"]
Expand Down
4 changes: 2 additions & 2 deletions protollm-synthetic/tests/test_summarization_chain.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import unittest
import os
from samplefactory.synthetic_pipelines.chains import SummarisationChain
from samplefactory.utils import VLLMChatOpenAI, Dataset
from protollm_synthetic.synthetic_pipelines.chains import SummarisationChain
from protollm_synthetic.utils import VLLMChatOpenAI, Dataset
import pandas as pd
import asyncio

Expand Down

0 comments on commit 75fcae3

Please sign in to comment.