-
Notifications
You must be signed in to change notification settings - Fork 63
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #147 from gwydion67/master
Fetch the latest Academic Calendar, parse its data and generate the ics file
- Loading branch information
Showing
6 changed files
with
268 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,4 +8,8 @@ data.txt | |
.idea/ | ||
.vscode | ||
venv | ||
.env | ||
.env | ||
|
||
ACADEMIC_CALENDAR_*.pdf | ||
Academic_Cal-j/** | ||
final.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,58 @@ | ||
beautifulsoup4==4.12.2 | ||
google_api_python_client==2.90.0 | ||
blinker==1.8.2 | ||
bs4==0.0.2 | ||
cachetools==5.5.0 | ||
certifi==2024.8.30 | ||
cffi==1.17.1 | ||
chardet==5.2.0 | ||
charset-normalizer==3.4.0 | ||
click==8.1.7 | ||
cryptography==43.0.1 | ||
et-xmlfile==1.1.0 | ||
Flask==3.0.3 | ||
Flask-Cors==4.0.1 | ||
ghostscript==0.7 | ||
google-api-core==2.21.0 | ||
google-api-python-client==2.90.0 | ||
google-auth==2.35.0 | ||
google-auth-httplib2==0.2.0 | ||
google-auth-oauthlib==1.2.1 | ||
googleapis-common-protos==1.65.0 | ||
gunicorn==22.0.0 | ||
httplib2==0.22.0 | ||
icalendar==5.0.7 | ||
idna==3.10 | ||
iitkgp_erp_login==2.4.2 | ||
itsdangerous==2.2.0 | ||
Jinja2==3.1.4 | ||
MarkupSafe==3.0.1 | ||
numpy==2.1.2 | ||
oauth2client==4.1.3 | ||
oauthlib==3.2.2 | ||
opencv-python==4.10.0.84 | ||
openpyxl==3.1.5 | ||
packaging==24.1 | ||
pandas==2.2.3 | ||
pdfminer.six==20240706 | ||
proto-plus==1.24.0 | ||
protobuf==5.28.2 | ||
pyasn1==0.6.1 | ||
pyasn1_modules==0.4.1 | ||
pycparser==2.22 | ||
pyparsing==3.2.0 | ||
pypdf==4.3.1 | ||
pypdf_table_extraction==0.0.2 | ||
python-dateutil==2.9.0.post0 | ||
pytz==2023.3 | ||
Requests==2.31.0 | ||
flask==3.0.3 | ||
flask_cors==4.0.1 | ||
gunicorn==22.0.0 | ||
requests==2.31.0 | ||
requests-oauthlib==2.0.0 | ||
rsa==4.9 | ||
setuptools==75.1.0 | ||
six==1.16.0 | ||
soupsieve==2.6 | ||
tabulate==0.9.0 | ||
tk==0.1.0 | ||
tzdata==2024.2 | ||
uritemplate==4.1.1 | ||
urllib3==2.2.3 | ||
Werkzeug==3.0.4 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
beautifulsoup4==4.12.2 | ||
google_api_python_client==2.90.0 | ||
httplib2==0.22.0 | ||
icalendar==5.0.7 | ||
iitkgp_erp_login==2.4.2 | ||
oauth2client==4.1.3 | ||
pytz==2023.3 | ||
Requests==2.31.0 | ||
flask==3.0.3 | ||
flask_cors==4.0.1 | ||
gunicorn==22.0.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
from utils.dates import * | ||
from utils.build_event import * | ||
from utils.academic_calander_handler import * | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,191 @@ | ||
from datetime import datetime, timedelta | ||
import glob | ||
import camelot | ||
import os | ||
import requests | ||
import shutil | ||
from zipfile import ZipFile | ||
import json | ||
from dataclasses import dataclass | ||
import re | ||
|
||
|
||
JSON_FOLDER_NAME = 'Academic_Cal-j' | ||
|
||
@dataclass | ||
class DataEntry: | ||
start_date: datetime = datetime.today() | ||
end_date: datetime = datetime.today() | ||
event: str = "" | ||
|
||
#get the current working directory | ||
def cwd(): | ||
return os.getcwd() | ||
|
||
def get_latest_calendar_name(): | ||
curr_year = datetime.today().year | ||
curr_month = datetime.today().month | ||
|
||
if(curr_month < 7): | ||
curr_year -= 1 | ||
|
||
year_str = str(curr_year) + '_' + str((curr_year % 100) + 1) | ||
filename = 'ACADEMIC_CALENDAR_' + year_str + '.pdf' | ||
return filename | ||
|
||
def is_file_present(file): | ||
if(os.path.exists(cwd() + '/' + file) or | ||
os.path.exists(cwd() + '/' + file + '/') | ||
): | ||
return True | ||
return False | ||
|
||
def delete_file(file): | ||
if(is_file_present(file)): | ||
try: | ||
print("DELETING file ",file) | ||
if(os.path.isdir(file)): | ||
shutil.rmtree(cwd() + '/' + file) | ||
elif(os.path.isfile(file)): | ||
os.remove(file) | ||
else: | ||
raise Exception("filename not valid") | ||
except Exception as e: | ||
print("ERROR: seems file already exists but cannot be deleted") | ||
print(e) | ||
return False | ||
else: | ||
print(file, "File not present..") | ||
|
||
#fetch the latest academic calendar from the iitkgp website | ||
def get_latest_calendar(): | ||
|
||
filename = get_latest_calendar_name() | ||
url = 'https://www.iitkgp.ac.in/assets/pdf/' + filename | ||
|
||
## delete any old academic calander pdf if exists | ||
if(is_file_present(filename)): | ||
delete_file(filename) | ||
|
||
with open(filename,"wb") as file: | ||
response = requests.get(url) | ||
file.write(response.content) | ||
|
||
if(is_file_present(filename)): | ||
return True | ||
return False | ||
|
||
def upzip_and_delete_zip(zip_file_name,result_folder_name): | ||
with ZipFile(zip_file_name) as zip: | ||
try: | ||
zip.extractall(result_folder_name) | ||
except Exception as E: | ||
print(E) | ||
return False | ||
|
||
print("Zip File not needed anymore, Deleteting ", zip_file_name) | ||
delete_file(zip_file_name) | ||
return True | ||
|
||
def export_json(): | ||
filename = get_latest_calendar_name() | ||
## ignore the read_pdf not found warning | ||
tables = camelot.read_pdf(filename,pages="all") | ||
|
||
print("Checking for pre-existing folder") | ||
delete_file(JSON_FOLDER_NAME) | ||
|
||
try: | ||
tables.export((JSON_FOLDER_NAME + '.json'),f='json',compress=True) | ||
except Exception as E: | ||
print(E) | ||
return False | ||
|
||
upzip_and_delete_zip((JSON_FOLDER_NAME + '.zip'),JSON_FOLDER_NAME) | ||
return True | ||
|
||
def get_json_files(): | ||
folder_path = cwd() + '/' + JSON_FOLDER_NAME | ||
if(is_file_present(JSON_FOLDER_NAME)): | ||
files = glob.glob(folder_path + '/*.json',include_hidden=True) | ||
return files | ||
else: | ||
return [] | ||
|
||
def merge_json(): | ||
merged_data = [] | ||
for file in get_json_files(): | ||
with open(file) as f: | ||
data = json.load(f) | ||
merged_data.extend(data) | ||
|
||
with open('final.json',"w") as f: | ||
json.dump(merged_data,f,indent=4) | ||
|
||
return merged_data | ||
|
||
def get_academic_calendar() -> list[DataEntry]: | ||
|
||
get_latest_calendar() | ||
export_json() | ||
|
||
all_dates = merge_json() | ||
all_dates = all_dates[1:] | ||
|
||
main_dates = [] | ||
# for date in all_dates: | ||
# entry = DataEntry() | ||
# if(len(date) > 4 and date['4'] != ''): | ||
# if(len(date['1']) > 3): | ||
# entry.event += date['1'].replace('\n','') | ||
# entry.event += date['2'].replace('\n','') | ||
# d = date['4'].replace('\n',' ').replace('(AN)','') | ||
# print(d.find("to")) | ||
# if(d.lower().find("to") != -1): | ||
# d = str(d).lower().split("to") | ||
# entry.start_date = datetime.strptime(d[0].split(" ")[0].strip(), "%d.%m.%Y") | ||
# entry.end_date = datetime.strptime(d[-1].split(" ")[-1].strip(), "%d.%m.%Y") | ||
# else: | ||
# entry.start_date = datetime.strptime(d,"%d.%m.%Y") | ||
# entry.end_date = ( entry.start_date + timedelta(1) ) | ||
# # elif(len(date) == 2 and date['1'] != ''): | ||
# # entry.event = date['0'] | ||
# # d = date['1'].replace('\n','') | ||
# # if(d.find("to")): | ||
# # d = str(d).split("to") | ||
# # entry.start_date = datetime.strptime(d[0].strip(), "%A, %d %B %Y") | ||
# # entry.end_date = datetime.strptime(d[1].strip(), "%A, %d %B %Y") | ||
# # else: | ||
# # entry.start_date = datetime.strptime(d,"%A, %d %B %Y") | ||
# # entry.end_date = ( entry.start_date + timedelta(1) ) | ||
# # main_dates.append([date['0'],datetime_object]) | ||
# main_dates.append(entry) | ||
|
||
date_regex = re.compile(r'\d{2}.\d{2}.\d{4}') | ||
maxLen = 1 | ||
for date in all_dates: | ||
if(len(date) > 4 and date['4'] != ''): | ||
entry = DataEntry() | ||
if(len(date['1']) > 3): | ||
entry.event += date['1'].replace('\n','') | ||
entry.event += date['2'].replace('\n','') | ||
|
||
d =date['3'].replace('\n',' ').replace('(AN)','') + date['4'].replace('\n',' ').replace('(AN)','') | ||
d = date_regex.findall(d) | ||
if(maxLen < len(d)): | ||
maxLen = len(d) | ||
if(len(d) == 1): | ||
entry.start_date = datetime.strptime(d[0],"%d.%m.%Y") | ||
entry.end_date = ( entry.start_date + timedelta(1) ) | ||
elif(len(d) == 2): | ||
entry.start_date = datetime.strptime(d[0],"%d.%m.%Y") | ||
entry.end_date = datetime.strptime(d[1],"%d.%m.%Y") | ||
main_dates.append(entry) | ||
annual_convocation = str(date['1']).strip().lower().split(" ") | ||
## KGP hai .. cannot trust, they can even mess up the spellings of annual convocation | ||
## this can just reduce the amount of places this will fail | ||
if(len(annual_convocation) == 2 and ("annual" in annual_convocation or "convocation" in annual_convocation)): | ||
break | ||
|
||
return main_dates | ||
|