-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathnaver_sports.py
42 lines (27 loc) · 1004 Bytes
/
naver_sports.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import requests
import json
from bs4 import BeautifulSoup
from constants import *
def main_board():
urls = list()
res = requests.get(naver_sports_news)
soup = BeautifulSoup(res.content, 'html.parser')
dives = soup.findAll('a', {'class', 'title'})[:10]
for div in dives:
urls.append('https://sports.news.naver.com' + div['href'])
return urls
def parse_article(link):
res = requests.get(link)
soup = BeautifulSoup(res.content, 'html.parser')
article_title = soup.find('h4', {'class', 'title'}).text
article_content = soup.find('div', {'class', 'news_end'}).text.replace("\n", "")
return article_title, '<b>' + article_title + '</b><br><br>' + article_content
def sports_start():
urls = main_board()
title_list = list()
content_list = list()
for url in urls:
title, content = parse_article(url)
title_list.append(title)
content_list.append(content)
return title_list, content_list