-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_parsing.py
29 lines (27 loc) · 1.15 KB
/
test_parsing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import requests
from bs4 import BeautifulSoup
def test_parsing():
url = 'https://krebsonsecurity.com/'
response = requests.get(url)
if response.status_code == 200:
soup = BeautifulSoup(response.content, 'lxml')
print("BeautifulSoup parsing successful")
posts = soup.select('div.post')
print(f"Number of posts found: {len(posts)}")
if posts:
first_post = posts[0]
title = first_post.select_one('h2.entry-title a').get_text()
author = first_post.select_one('span.byline a').get_text()
date = first_post.select_one('time').get_text()
summary = first_post.select_one('div.entry-summary p').get_text()
link = first_post.select_one('h2.entry-title a')['href']
print("First post details:")
print(f"Title: {title}")
print(f"Author: {author}")
print(f"Date: {date}")
print(f"Summary: {summary}")
print(f"Link: {link}")
else:
print(f"Failed to fetch the webpage. Status code: {response.status_code}")
if __name__ == "__main__":
test_parsing()