-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'Development' into 65-implementacao-do-dockerr
- Loading branch information
Showing
6 changed files
with
173 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
# Raspagem de dados das disciplinas do site Sigaa da UnB e salva como fóruns | ||
|
||
from django.core.management.base import BaseCommand | ||
from main.models import Forum | ||
from main.scraping import DisciplineWebScraper | ||
|
||
class Command(BaseCommand): | ||
help = 'Raspa dados das disciplinas do site Sigaa da UnB e salva como fóruns' | ||
|
||
def handle(self, *args, **kwargs): | ||
departments = ["518", "524", "673"] | ||
year = "2024" | ||
period = "1" | ||
|
||
Forum.objects.all().delete() | ||
self.stdout.write(self.style.WARNING('Todos os fóruns antigos foram removidos.')) | ||
|
||
for department in departments: | ||
scraper = DisciplineWebScraper(department, year, period) | ||
disciplines = scraper.get_disciplines() | ||
|
||
for code, names in disciplines.items(): | ||
for name in names: | ||
title = f"{code} - {name}" | ||
forum, created = Forum.objects.get_or_create( | ||
title=title, | ||
defaults={'description': ''} | ||
) | ||
if created: | ||
self.stdout.write(self.style.SUCCESS(f'Fórum "{title}" criado com sucesso.')) | ||
else: | ||
self.stdout.write(self.style.WARNING(f'Fórum "{title}" já existe.')) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
# Generated by Django 4.2.13 on 2024-07-29 02:42 | ||
|
||
from django.db import migrations, models | ||
|
||
|
||
class Migration(migrations.Migration): | ||
|
||
dependencies = [ | ||
('main', '0002_remove_answer_forum_answer_author_answer_description_and_more'), | ||
] | ||
|
||
operations = [ | ||
migrations.AlterField( | ||
model_name='forum', | ||
name='description', | ||
field=models.TextField(blank=True, null=True), | ||
), | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
# Obtendo os dados das disciplinas da UnB | ||
|
||
from .sessions import URL, HEADERS, create_request_session, get_session_cookie, get_response | ||
from bs4 import BeautifulSoup | ||
from collections import defaultdict | ||
from typing import List, Optional | ||
import requests | ||
|
||
def get_list_of_departments(response=get_response(create_request_session())) -> Optional[List]: | ||
"""Obtem a lista de departamentos da UnB.""" | ||
soup = BeautifulSoup(response.content, "html.parser") | ||
departments = soup.find("select", attrs={"id": "formTurma:inputDepto"}) | ||
|
||
if departments is None: | ||
return None | ||
|
||
options_tag = departments.find_all("option") | ||
department_ids = [option["value"] for option in options_tag if option["value"] != "0"] | ||
|
||
return department_ids | ||
|
||
class DisciplineWebScraper: | ||
def __init__(self, department: str, year: str, period: str, url=URL, session=None, cookie=None): | ||
self.disciplines: defaultdict[str, List[str]] = defaultdict(list) | ||
self.department = department | ||
self.period = period | ||
self.year = year | ||
self.url = url | ||
self.data = { | ||
"formTurma": "formTurma", | ||
"formTurma:inputNivel": "", | ||
"formTurma:inputDepto": self.department, | ||
"formTurma:inputAno": self.year, | ||
"formTurma:inputPeriodo": self.period, | ||
"formTurma:j_id_jsp_1370969402_11": "Buscar", | ||
"javax.faces.ViewState": "j_id1" | ||
} | ||
|
||
self.session = session if session is not None else create_request_session() | ||
self.cookie = cookie if cookie is not None else get_session_cookie(self.session) | ||
self.response = None | ||
|
||
def get_response_from_disciplines_post_request(self) -> requests.Response: | ||
self.response = self.session.post( | ||
self.url, | ||
headers=HEADERS, | ||
cookies=self.cookie, | ||
data=self.data | ||
) | ||
|
||
def make_disciplines(self, rows: str) -> None: | ||
if rows is None or not len(rows): | ||
return None | ||
|
||
aux_title_and_code = "" | ||
|
||
for discipline in rows: | ||
if discipline.find("span", attrs={"class": "tituloDisciplina"}) is not None: | ||
title = discipline.find("span", attrs={"class": "tituloDisciplina"}) | ||
aux_title_and_code = title.get_text().strip('-') | ||
|
||
elif "linhaPar" in discipline.get("class", []) or "linhaImpar" in discipline.get("class", []): | ||
code, name = aux_title_and_code.split(' - ', 1) | ||
self.disciplines[code].append(name) | ||
|
||
def retrieve_classes_tables(self, response): | ||
soup = BeautifulSoup(response.content, "html.parser") | ||
tables = soup.find("table", attrs={"class": "listagem"}) | ||
|
||
if tables is None: | ||
return None | ||
|
||
return tables | ||
|
||
def make_web_scraping_of_disciplines(self, response) -> None: | ||
tables = self.retrieve_classes_tables(response) | ||
|
||
if not tables: | ||
return None | ||
|
||
table_rows = tables.find_all("tr") | ||
self.make_disciplines(table_rows) | ||
|
||
def get_disciplines(self) -> defaultdict[str, List[str]]: | ||
if not self.response: | ||
self.get_response_from_disciplines_post_request() | ||
self.make_web_scraping_of_disciplines(self.response) | ||
|
||
return self.disciplines |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
import requests | ||
|
||
URL = "https://sigaa.unb.br/sigaa/public/turmas/listar.jsf" | ||
HEADERS = { | ||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", | ||
"Content-Type": "application/x-www-form-urlencoded", | ||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", | ||
"Accept-Language": "en-US,en;q=0.5", | ||
"Connection": "keep-alive", | ||
"Upgrade-Insecure-Requests": "1", | ||
"TE": "Trailers", | ||
} | ||
|
||
def create_request_session(): | ||
session = requests.Session() | ||
session.headers.update(HEADERS) | ||
return session | ||
|
||
def get_session_cookie(session): | ||
response = session.get(URL) | ||
return response.cookies | ||
|
||
def get_response(session=None): | ||
if session is None: | ||
session = create_request_session() | ||
response = session.get(URL) | ||
return response |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters