Skip to content

Commit

Permalink
[Feat] 크롤링 데이터 변경
Browse files Browse the repository at this point in the history
- 전시 상세 정보 -> 전시 상세 정보 Url
- 데이터베이스 ddl-auto 값 validate로 변경
  • Loading branch information
yjy8501 committed Jun 12, 2024
1 parent 6515dc9 commit 09e2ea8
Show file tree
Hide file tree
Showing 7 changed files with 15 additions and 89 deletions.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ private ExhibitionUpdateDto getExhibitionUpdateDto() throws IOException {
// 검색어 설정
String searchWord = HtmlEntityRemover.removeHtmlEntities(exhibitionInfo.getTitle());
String url = "https://www.mcst.go.kr/kor/s_culture/culture/cultureList.jsp?pSeq=&pRo=&pCurrentPage=1&pType=&pPeriod=&fromDt=&toDt=&pArea=&pSearchType=01&pSearchWord=" + searchWord;
String exhibitionSite = "https://www.mcst.go.kr/kor/s_culture/culture/";

// Jsoup을 이용해 웹 페이지에서 데이터 추출
Document doc = Jsoup.connect(url).get();
Expand All @@ -35,7 +36,7 @@ private ExhibitionUpdateDto getExhibitionUpdateDto() throws IOException {
String href = link.attr("href");
return ExhibitionUpdateDto.builder()
.exhibitionInfo(exhibitionInfo)
.url(href).build();
.url(exhibitionSite+href).build();
}
return null;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,4 @@
public class ExhibitionUpdateDto {
private ExhibitionInfo exhibitionInfo;
private String url;
private String description;
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ public class ExhibitionInfo extends BaseTimeEntity {
@Column(unique = true)
private String title;

@Column(length = 2000)
private String description;
@Column(name = "detail_info_url", length = 500)
private String detailInfoUrl;

@Column(name = "start_date")
@NotNull
Expand Down Expand Up @@ -83,11 +83,11 @@ public class ExhibitionInfo extends BaseTimeEntity {
private String progressStatus;

@Builder
public ExhibitionInfo(Long id, int seq, String title, String description, LocalDate startDate, LocalDate endDate, String place, String realmName, String area, String imageUrl, double gpsX, double gpsY, String ticketingUrl, String phone, String price, String placeAddr, String progressStatus) {
public ExhibitionInfo(Long id, int seq, String title, String detailInfoUrl, LocalDate startDate, LocalDate endDate, String place, String realmName, String area, String imageUrl, double gpsX, double gpsY, String ticketingUrl, String phone, String price, String placeAddr, String progressStatus) {
this.id = id;
this.seq = seq;
this.title = title;
this.description = description;
this.detailInfoUrl = detailInfoUrl;
this.startDate = startDate;
this.endDate = endDate;
this.place = place;
Expand All @@ -104,6 +104,6 @@ public ExhibitionInfo(Long id, int seq, String title, String description, LocalD
}

public void updateForm(ExhibitionUpdateDto exhibitionUpdateDto) {
this.description = exhibitionUpdateDto.getDescription();
this.detailInfoUrl = exhibitionUpdateDto.getUrl();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import jakarta.transaction.Transactional;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.com.artfriendlybatch.domain.exhibition.callable.ExhibitionDescriptionCallable;
import org.com.artfriendlybatch.domain.exhibition.callable.ExhibitionUrlCallable;
import org.com.artfriendlybatch.domain.exhibition.dto.ExhibitionUpdateDto;
import org.com.artfriendlybatch.domain.exhibition.entity.ExhibitionInfo;
Expand All @@ -27,7 +26,7 @@ public List<ExhibitionInfo> findExhibitionInfoByCreateTime() {
}

@Transactional
public List<ExhibitionUpdateDto> getStringList() {
public void getDetailUrl() {
List<ExhibitionUpdateDto> urls = new ArrayList<>();
ExecutorService executor = Executors.newFixedThreadPool(10); // 10개의 스레드를 가진 스레드 풀 생성

Expand All @@ -51,48 +50,17 @@ public List<ExhibitionUpdateDto> getStringList() {
log.error("연동 오류"+e.getMessage());
}
}
} catch (Exception e) {
e.printStackTrace();
} finally {
executor.shutdown(); // 모든 작업이 완료되면 스레드 풀 종료
}

return urls;
}

public void extracted(List<ExhibitionUpdateDto> exhibitionUpdateDtos) {
List<ExhibitionUpdateDto> exhibitionUpdateDtoList = new ArrayList<>();
ExecutorService executor = Executors.newFixedThreadPool(10);
List<Future<ExhibitionUpdateDto>> futures = new ArrayList<>();

try {
for (ExhibitionUpdateDto updateDto : exhibitionUpdateDtos) {
Callable<ExhibitionUpdateDto> callable = new ExhibitionDescriptionCallable(updateDto);
Future<ExhibitionUpdateDto> future = executor.submit(callable);
futures.add(future);
}

// 모든 Future 결과를 처리
for (Future<ExhibitionUpdateDto> future : futures) {
try {
ExhibitionUpdateDto result = future.get();
if (result != null) {
exhibitionUpdateDtoList.add(result);
}
} catch (ExecutionException e) {
log.error("연동 오류 "+e.getMessage());
}
}

for (ExhibitionUpdateDto updateDto : exhibitionUpdateDtoList) {
for (ExhibitionUpdateDto updateDto : urls) {
updateDto.getExhibitionInfo().updateForm(updateDto);
}

} catch (Exception e) {
Thread.currentThread().interrupt();
log.warn("future.get() Error");
e.printStackTrace();
} finally {
executor.shutdown(); // 스레드 풀 종료
executor.shutdown(); // 모든 작업이 완료되면 스레드 풀 종료
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ public class Task {
private final ExhibitionInfoService exhibitionInfoService;

public RepeatStatus exhibitionInfoCrawling() {
List<ExhibitionUpdateDto> urls = exhibitionInfoService.getStringList();
exhibitionInfoService.extracted(urls);
exhibitionInfoService.getDetailUrl();

return RepeatStatus.FINISHED;
}

Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/application.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ spring:
driver-class-name: com.mysql.cj.jdbc.Driver
jpa:
hibernate:
ddl-auto: update
ddl-auto: validate
show-sql: true
properties:
hibernate:
Expand Down

0 comments on commit 09e2ea8

Please sign in to comment.