Skip to content

Commit

Permalink
Merge branch 'Development' into 65-implementacao-do-dockerr
Browse files Browse the repository at this point in the history
  • Loading branch information
Davi-KLevy authored Jul 29, 2024
2 parents 96f5a74 + 75ebd2a commit 8a1db1c
Show file tree
Hide file tree
Showing 6 changed files with 173 additions and 1 deletion.
32 changes: 32 additions & 0 deletions forunb/main/management/commands/scraping_sigaa.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Raspagem de dados das disciplinas do site Sigaa da UnB e salva como fóruns

from django.core.management.base import BaseCommand
from main.models import Forum
from main.scraping import DisciplineWebScraper

class Command(BaseCommand):
help = 'Raspa dados das disciplinas do site Sigaa da UnB e salva como fóruns'

def handle(self, *args, **kwargs):
departments = ["518", "524", "673"]
year = "2024"
period = "1"

Forum.objects.all().delete()
self.stdout.write(self.style.WARNING('Todos os fóruns antigos foram removidos.'))

for department in departments:
scraper = DisciplineWebScraper(department, year, period)
disciplines = scraper.get_disciplines()

for code, names in disciplines.items():
for name in names:
title = f"{code} - {name}"
forum, created = Forum.objects.get_or_create(
title=title,
defaults={'description': ''}
)
if created:
self.stdout.write(self.style.SUCCESS(f'Fórum "{title}" criado com sucesso.'))
else:
self.stdout.write(self.style.WARNING(f'Fórum "{title}" já existe.'))
18 changes: 18 additions & 0 deletions forunb/main/migrations/0003_alter_forum_description.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 4.2.13 on 2024-07-29 02:42

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('main', '0002_remove_answer_forum_answer_author_answer_description_and_more'),
]

operations = [
migrations.AlterField(
model_name='forum',
name='description',
field=models.TextField(blank=True, null=True),
),
]
2 changes: 1 addition & 1 deletion forunb/main/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

class Forum(models.Model):
title = models.CharField(max_length=100)
description = models.TextField()
description = models.TextField(blank=True, null=True)
created_at = models.DateTimeField(auto_now_add=True)

def __str__(self):
Expand Down
89 changes: 89 additions & 0 deletions forunb/main/scraping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
# Obtendo os dados das disciplinas da UnB

from .sessions import URL, HEADERS, create_request_session, get_session_cookie, get_response
from bs4 import BeautifulSoup
from collections import defaultdict
from typing import List, Optional
import requests

def get_list_of_departments(response=get_response(create_request_session())) -> Optional[List]:
"""Obtem a lista de departamentos da UnB."""
soup = BeautifulSoup(response.content, "html.parser")
departments = soup.find("select", attrs={"id": "formTurma:inputDepto"})

if departments is None:
return None

options_tag = departments.find_all("option")
department_ids = [option["value"] for option in options_tag if option["value"] != "0"]

return department_ids

class DisciplineWebScraper:
def __init__(self, department: str, year: str, period: str, url=URL, session=None, cookie=None):
self.disciplines: defaultdict[str, List[str]] = defaultdict(list)
self.department = department
self.period = period
self.year = year
self.url = url
self.data = {
"formTurma": "formTurma",
"formTurma:inputNivel": "",
"formTurma:inputDepto": self.department,
"formTurma:inputAno": self.year,
"formTurma:inputPeriodo": self.period,
"formTurma:j_id_jsp_1370969402_11": "Buscar",
"javax.faces.ViewState": "j_id1"
}

self.session = session if session is not None else create_request_session()
self.cookie = cookie if cookie is not None else get_session_cookie(self.session)
self.response = None

def get_response_from_disciplines_post_request(self) -> requests.Response:
self.response = self.session.post(
self.url,
headers=HEADERS,
cookies=self.cookie,
data=self.data
)

def make_disciplines(self, rows: str) -> None:
if rows is None or not len(rows):
return None

aux_title_and_code = ""

for discipline in rows:
if discipline.find("span", attrs={"class": "tituloDisciplina"}) is not None:
title = discipline.find("span", attrs={"class": "tituloDisciplina"})
aux_title_and_code = title.get_text().strip('-')

elif "linhaPar" in discipline.get("class", []) or "linhaImpar" in discipline.get("class", []):
code, name = aux_title_and_code.split(' - ', 1)
self.disciplines[code].append(name)

def retrieve_classes_tables(self, response):
soup = BeautifulSoup(response.content, "html.parser")
tables = soup.find("table", attrs={"class": "listagem"})

if tables is None:
return None

return tables

def make_web_scraping_of_disciplines(self, response) -> None:
tables = self.retrieve_classes_tables(response)

if not tables:
return None

table_rows = tables.find_all("tr")
self.make_disciplines(table_rows)

def get_disciplines(self) -> defaultdict[str, List[str]]:
if not self.response:
self.get_response_from_disciplines_post_request()
self.make_web_scraping_of_disciplines(self.response)

return self.disciplines
27 changes: 27 additions & 0 deletions forunb/main/sessions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import requests

URL = "https://sigaa.unb.br/sigaa/public/turmas/listar.jsf"
HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
"Content-Type": "application/x-www-form-urlencoded",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.5",
"Connection": "keep-alive",
"Upgrade-Insecure-Requests": "1",
"TE": "Trailers",
}

def create_request_session():
session = requests.Session()
session.headers.update(HEADERS)
return session

def get_session_cookie(session):
response = session.get(URL)
return response.cookies

def get_response(session=None):
if session is None:
session = create_request_session()
response = session.get(URL)
return response
6 changes: 6 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
asgiref==3.8.1
Babel==2.15.0
beautifulsoup4==4.12.3
bs4==0.0.2
certifi==2024.7.4
charset-normalizer==3.3.2
click==8.1.7
Expand All @@ -21,6 +23,7 @@ paginate==0.5.6
pathspec==0.12.1
platformdirs==4.2.2
pygments==2.18.0
pkg_resources==0.0.0
pymdown-extensions==10.8.1
python-dateutil==2.9.0.post0
pytz==2024.1
Expand All @@ -35,3 +38,6 @@ tzdata==2024.1
urllib3==2.2.2
watchdog==4.0.1
zipp==3.19.2
soupsieve==2.5
style==1.1.0
update==0.0.1

0 comments on commit 8a1db1c

Please sign in to comment.