Skip to content

Commit

Permalink
refactor: 변경된 린트 반영
Browse files Browse the repository at this point in the history
  • Loading branch information
hun-ca committed Dec 17, 2024
1 parent a5f6855 commit b35558f
Show file tree
Hide file tree
Showing 12 changed files with 232 additions and 199 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@ import org.springframework.web.bind.annotation.RequestBody
@FeignClient(
name = "openAiClient",
url = "\${openai.api.url}",
configuration = [OpenAiFeignConfiguration::class]
configuration = [OpenAiFeignConfiguration::class],
)
interface OpenAiClient {
@PostMapping
fun send(@RequestBody request: OpenAiRequest): OpenAiResponse
fun send(
@RequestBody request: OpenAiRequest,
): OpenAiResponse
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,11 @@ import org.springframework.context.annotation.Configuration

@Configuration
class GsonConfig {

@Bean
fun fewGson(): Gson {
return GsonBuilder()
fun fewGson(): Gson =
GsonBuilder()
.setLenient()
.disableHtmlEscaping()
.setPrettyPrinting()
.create()
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,9 @@ class OpenAiFeignConfiguration(
@Value("\${openai.api.key}") private val apiKey: String,
) {
@Bean
fun requestInterceptor(): RequestInterceptor {
return RequestInterceptor { template ->
fun requestInterceptor(): RequestInterceptor =
RequestInterceptor { template ->
template.header("Authorization", "Bearer $apiKey")
template.header("Content-Type", "application/json")
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,31 +17,35 @@ class ChatGpt(
@Value("\${openai.api.model.basic}") private val AI_BASIC_MODEL: String,
@Value("\${openai.api.model.advanced}") private val AI_ADVANCED_MODEL: String,
) {
fun summarizeNews(news: News): JsonObject = doAsk(promptGenerator.createSummaryPrompt(news), AI_BASIC_MODEL)

fun summarizeNews(news: News): JsonObject =
doAsk(promptGenerator.createSummaryPrompt(news), AI_BASIC_MODEL)
fun groupNews(newsList: List<News>): JsonObject = doAsk(promptGenerator.createGroupingPrompt(newsList), AI_ADVANCED_MODEL)

fun groupNews(newsList: List<News>): JsonObject =
doAsk(promptGenerator.createGroupingPrompt(newsList), AI_ADVANCED_MODEL)
fun summarizeNewsGroup(group: GroupNews): JsonObject = doAsk(promptGenerator.createSummaryPrompt(group), AI_BASIC_MODEL)

fun summarizeNewsGroup(group: GroupNews): JsonObject =
doAsk(promptGenerator.createSummaryPrompt(group), AI_BASIC_MODEL)

fun refineSummarizedNewsGroup(group: GroupNews): JsonObject =
doAsk(promptGenerator.createRefinePrompt(group), AI_BASIC_MODEL)
fun refineSummarizedNewsGroup(group: GroupNews): JsonObject = doAsk(promptGenerator.createRefinePrompt(group), AI_BASIC_MODEL)

/**
* 공통된 OpenAI 요청 처리 및 JSON 결과 반환
*/
private fun doAsk(prompt: List<Map<String, String>>, aiModel: String): JsonObject {
val request = OpenAiRequest(
model = aiModel,
messages = prompt
)
private fun doAsk(
prompt: List<Map<String, String>>,
aiModel: String,
): JsonObject {
val request =
OpenAiRequest(
model = aiModel,
messages = prompt,
)

val response = openAiClient.send(request)
val resultContent = response.choices.firstOrNull()?.message?.content?.trim()
?: throw Exception("요약 결과를 찾을 수 없습니다.")
val resultContent =
response.choices
.firstOrNull()
?.message
?.content
?.trim()
?: throw Exception("요약 결과를 찾을 수 없습니다.")

return fewGson.fromJson(resultContent, JsonObject::class.java)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package com.few.domain.generator.core

import com.few.domain.generator.core.model.News
import com.google.gson.Gson
import org.springframework.stereotype.Component
import com.google.gson.reflect.TypeToken
import io.github.oshai.kotlinlogging.KotlinLogging
import kotlinx.coroutines.CoroutineScope
Expand All @@ -11,14 +10,14 @@ import kotlinx.coroutines.Dispatchers
import kotlinx.coroutines.async
import kotlinx.coroutines.sync.Semaphore
import kotlinx.coroutines.sync.withPermit
import org.springframework.stereotype.Component
import java.io.File

@Component
class Extractor(
private val chatGpt: ChatGpt,
private val fewGson: Gson,
) {

private val log = KotlinLogging.logger {}

fun loadContentFromJson(inputFilePath: String): List<News> {
Expand All @@ -33,28 +32,33 @@ class Extractor(
return fewGson.fromJson(file.readText(), type)
}

suspend fun extractAndSaveNews(inputFilePath: String, outputFilePath: String): Int {
suspend fun extractAndSaveNews(
inputFilePath: String,
outputFilePath: String,
): Int {
val newsModels = loadContentFromJson(inputFilePath)
val semaphore = Semaphore(5) // 최대 동시 실행 개수 제한
val routines = mutableListOf<Deferred<Unit>>()

for (newsModel in newsModels) {
val routine = CoroutineScope(Dispatchers.IO).async {
semaphore.withPermit {
try {
val summarizedNews = chatGpt.summarizeNews(newsModel)
newsModel.summary = summarizedNews.get("summary")?.asString ?: "요약을 생성할 수 없습니다."
newsModel.importantSentences = if (summarizedNews.has("important_sentences")) {
val sentencesJsonArray = summarizedNews.getAsJsonArray("important_sentences")
sentencesJsonArray.mapNotNull { it.asString }
} else {
emptyList()
val routine =
CoroutineScope(Dispatchers.IO).async {
semaphore.withPermit {
try {
val summarizedNews = chatGpt.summarizeNews(newsModel)
newsModel.summary = summarizedNews.get("summary")?.asString ?: "요약을 생성할 수 없습니다."
newsModel.importantSentences =
if (summarizedNews.has("important_sentences")) {
val sentencesJsonArray = summarizedNews.getAsJsonArray("important_sentences")
sentencesJsonArray.mapNotNull { it.asString }
} else {
emptyList()
}
} catch (e: Exception) {
log.error { "${newsModel.title}에 대한 요약 중 오류 발생: ${e.message}" }
}
} catch (e: Exception) {
log.error { "${newsModel.title}에 대한 요약 중 오류 발생: ${e.message}" }
}
}
}
routines.add(routine)
}

Expand All @@ -68,7 +72,10 @@ class Extractor(
return newsModels.size
}

fun saveNewsToJson(newsList: List<News>, outputFilePath: String) {
fun saveNewsToJson(
newsList: List<News>,
outputFilePath: String,
) {
// List<NewsModel>을 JSON 문자열로 변환
val newsData = newsList.map { it.toMap() }
val jsonString = fewGson.toJson(newsData)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@ class GroupNewsSummarizer(
) {
private val log = KotlinLogging.logger {}

fun summarizeAndSaveGroupedNews(inputFilePath: String, outputFilePath: String): Int {
fun summarizeAndSaveGroupedNews(
inputFilePath: String,
outputFilePath: String,
): Int {
val groupedNews = loadGroupedNews(inputFilePath)

for ((index, group) in groupedNews.withIndex()) {
Expand Down Expand Up @@ -53,14 +56,17 @@ class GroupNewsSummarizer(
return fewGson.fromJson(sectionData, SectionContent::class.java)
}

private fun saveSummariesToJson(groupedNews: List<GroupNews>, outputFilePath: String) {
private fun saveSummariesToJson(
groupedNews: List<GroupNews>,
outputFilePath: String,
) {
// GroupNewsModel 객체 리스트를 Map 리스트로 변환
val groupNewsData = groupedNews.map { it.toMap() }

// JSON 파일로 저장
File(outputFilePath).writeText(
fewGson.toJson(groupNewsData),
Charsets.UTF_8
Charsets.UTF_8,
)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@ import io.github.oshai.kotlinlogging.KotlinLogging
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import org.springframework.stereotype.Component
import java.util.regex.Pattern
import java.io.File
import java.time.LocalDateTime
import java.time.format.DateTimeFormatter
import java.util.regex.Pattern

@Component
class NaverNewsCrawler(
Expand All @@ -20,7 +20,10 @@ class NaverNewsCrawler(
private val log = KotlinLogging.logger {}
private val regex_news_links = "https://n\\.news\\.naver\\.com/mnews/article/\\d+/\\d+$"
private val headers =
mapOf("User-Agent" to "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36")
mapOf(
"User-Agent" to
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36",
)

private fun getSoup(url: String): Document {
val connection = Jsoup.connect(url)
Expand All @@ -30,8 +33,10 @@ class NaverNewsCrawler(
return connection.get()
}

private fun makeUrl(sid: Int, page: Int) =
"https://news.naver.com/main/main.naver?mode=LSD&mid=shm&sid1=$sid#&date=%2000:00:00&page=$page"
private fun makeUrl(
sid: Int,
page: Int,
) = "https://news.naver.com/main/main.naver?mode=LSD&mid=shm&sid1=$sid#&date=%2000:00:00&page=$page"

fun getNaverNewsUrls(sid: Int): List<String> {
log.info { "$sid 분야의 뉴스 링크를 수집합니다." }
Expand All @@ -43,10 +48,11 @@ class NaverNewsCrawler(

// Regex to match the desired link pattern
val pattern = Pattern.compile(regex_news_links)
val links = soup.select("a[href]").mapNotNull { element ->
val href = element.attr("href")
if (pattern.matcher(href).matches()) href else null
}
val links =
soup.select("a[href]").mapNotNull { element ->
val href = element.attr("href")
if (pattern.matcher(href).matches()) href else null
}

allLinks.addAll(links)

Expand All @@ -66,10 +72,14 @@ class NaverNewsCrawler(

val title = soup.selectFirst("#title_area > span")
val date =
soup.selectFirst("#ct > div.media_end_head.go_trans > div.media_end_head_info.nv_notrans > div.media_end_head_info_datestamp > div:nth-child(1) > span")
soup.selectFirst(
"#ct > div.media_end_head.go_trans > div.media_end_head_info.nv_notrans > div.media_end_head_info_datestamp > div:nth-child(1) > span",
)
val content = soup.selectFirst("#dic_area")
val linkElement =
soup.selectFirst("#ct > div.media_end_head.go_trans > div.media_end_head_info.nv_notrans > div.media_end_head_info_datestamp > a.media_end_head_origin_link")
soup.selectFirst(
"#ct > div.media_end_head.go_trans > div.media_end_head_info.nv_notrans > div.media_end_head_info_datestamp > a.media_end_head_origin_link",
)
val originalLink = linkElement?.attr("href")

// TODO 원본 데이터 DB 저장으로 변경
Expand All @@ -82,31 +92,33 @@ class NaverNewsCrawler(
val dateStr = date.text().trim()
val dateParts = dateStr.split(" ")

val dateTime: LocalDateTime = if (dateParts.size == 3) {
val dateOnly = dateParts[0]
val amPm = dateParts[1]
val time = dateParts[2]

val (hour, minute) = time.split(":").map { it.toInt() }
val adjustedHour = when {
amPm == "오후" && hour != 12 -> hour + 12
amPm == "오전" && hour == 12 -> 0
else -> hour
val dateTime: LocalDateTime =
if (dateParts.size == 3) {
val dateOnly = dateParts[0]
val amPm = dateParts[1]
val time = dateParts[2]

val (hour, minute) = time.split(":").map { it.toInt() }
val adjustedHour =
when {
amPm == "오후" && hour != 12 -> hour + 12
amPm == "오전" && hour == 12 -> 0
else -> hour
}

val dateTimeStr = "$dateOnly ${"%02d".format(adjustedHour)}:${"%02d".format(minute)}"
LocalDateTime.parse(dateTimeStr, DateTimeFormatter.ofPattern("yyyy.MM.dd. HH:mm"))
} else {
LocalDateTime.parse(dateStr, DateTimeFormatter.ofPattern("yyyy.MM.dd. HH:mm"))
}

val dateTimeStr = "$dateOnly ${"%02d".format(adjustedHour)}:${"%02d".format(minute)}"
LocalDateTime.parse(dateTimeStr, DateTimeFormatter.ofPattern("yyyy.MM.dd. HH:mm"))
} else {
LocalDateTime.parse(dateStr, DateTimeFormatter.ofPattern("yyyy.MM.dd. HH:mm"))
}

return originalLink?.let {
News(
title = title.text().trim(),
content = content.text().trim(),
date = dateTime,
link = url,
originalLink = it
originalLink = it,
)
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ package com.few.domain.generator.core

import com.few.domain.generator.core.model.GroupNews
import com.few.domain.generator.core.model.News
import java.io.File
import com.google.gson.Gson
import com.google.gson.JsonObject
import com.google.gson.reflect.TypeToken
import io.github.oshai.kotlinlogging.KotlinLogging
import org.springframework.stereotype.Component
import java.io.File

@Component
class NewsGrouper(
Expand All @@ -16,7 +16,10 @@ class NewsGrouper(
) {
private val log = KotlinLogging.logger {}

fun groupAndSaveNews(inputFilePath: String, outputFilePath: String) {
fun groupAndSaveNews(
inputFilePath: String,
outputFilePath: String,
) {
val newsList = loadSummarizedNews(inputFilePath)

log.info { "뉴스 그룹화 진행 중..." }
Expand Down Expand Up @@ -62,10 +65,11 @@ class NewsGrouper(

// 뉴스가 3개 이상인 경우만 추가
if (newsInGroup.size >= 3) {
val groupNews = GroupNews(
topic = group.getAsJsonPrimitive("topic").asString,
news = newsInGroup
)
val groupNews =
GroupNews(
topic = group.getAsJsonPrimitive("topic").asString,
news = newsInGroup,
)
result.add(groupNews)
log.info { "groupNewsIds: $groupNewsIds" }
}
Expand Down
Loading

0 comments on commit b35558f

Please sign in to comment.