Skip to content

Commit

Permalink
Merge pull request #147 from gwydion67/master
Browse files Browse the repository at this point in the history
Fetch the latest Academic Calendar, parse its data and generate the ics file
  • Loading branch information
proffapt authored Feb 3, 2025
2 parents d2149a8 + 0837f38 commit 95d58f7
Show file tree
Hide file tree
Showing 6 changed files with 268 additions and 7 deletions.
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,8 @@ data.txt
.idea/
.vscode
venv
.env
.env

ACADEMIC_CALENDAR_*.pdf
Academic_Cal-j/**
final.json
57 changes: 52 additions & 5 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,58 @@
beautifulsoup4==4.12.2
google_api_python_client==2.90.0
blinker==1.8.2
bs4==0.0.2
cachetools==5.5.0
certifi==2024.8.30
cffi==1.17.1
chardet==5.2.0
charset-normalizer==3.4.0
click==8.1.7
cryptography==43.0.1
et-xmlfile==1.1.0
Flask==3.0.3
Flask-Cors==4.0.1
ghostscript==0.7
google-api-core==2.21.0
google-api-python-client==2.90.0
google-auth==2.35.0
google-auth-httplib2==0.2.0
google-auth-oauthlib==1.2.1
googleapis-common-protos==1.65.0
gunicorn==22.0.0
httplib2==0.22.0
icalendar==5.0.7
idna==3.10
iitkgp_erp_login==2.4.2
itsdangerous==2.2.0
Jinja2==3.1.4
MarkupSafe==3.0.1
numpy==2.1.2
oauth2client==4.1.3
oauthlib==3.2.2
opencv-python==4.10.0.84
openpyxl==3.1.5
packaging==24.1
pandas==2.2.3
pdfminer.six==20240706
proto-plus==1.24.0
protobuf==5.28.2
pyasn1==0.6.1
pyasn1_modules==0.4.1
pycparser==2.22
pyparsing==3.2.0
pypdf==4.3.1
pypdf_table_extraction==0.0.2
python-dateutil==2.9.0.post0
pytz==2023.3
Requests==2.31.0
flask==3.0.3
flask_cors==4.0.1
gunicorn==22.0.0
requests==2.31.0
requests-oauthlib==2.0.0
rsa==4.9
setuptools==75.1.0
six==1.16.0
soupsieve==2.6
tabulate==0.9.0
tk==0.1.0
tzdata==2024.2
uritemplate==4.1.1
urllib3==2.2.3
Werkzeug==3.0.4
11 changes: 11 additions & 0 deletions requirements.txt.bak
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
beautifulsoup4==4.12.2
google_api_python_client==2.90.0
httplib2==0.22.0
icalendar==5.0.7
iitkgp_erp_login==2.4.2
oauth2client==4.1.3
pytz==2023.3
Requests==2.31.0
flask==3.0.3
flask_cors==4.0.1
gunicorn==22.0.0
9 changes: 8 additions & 1 deletion timetable/generate_ics.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from icalendar import Calendar, Event
from datetime import datetime, timedelta
from timetable import Course
from utils import dates, build_event_duration, generate_india_time, next_weekday
from utils import academic_calander_handler, dates, build_event_duration, generate_india_time, next_weekday

WORKING_DAYS = dates.get_dates()

Expand Down Expand Up @@ -51,6 +51,13 @@ def generate_ics(courses: list[Course], output_filename):
event.add("dtstart", holiday[1])
event.add("dtend", holiday[1] + timedelta(days=1))
cal.add_component(event)

for entry in academic_calander_handler.get_academic_calendar():
event = Event()
event.add("summary", entry.event)
event.add("dtstart",entry.start_date)
event.add("dtend",entry.end_date)
cal.add_component(event)


if output_filename != "":
Expand Down
1 change: 1 addition & 0 deletions utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from utils.dates import *
from utils.build_event import *
from utils.academic_calander_handler import *

191 changes: 191 additions & 0 deletions utils/academic_calander_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
from datetime import datetime, timedelta
import glob
import camelot
import os
import requests
import shutil
from zipfile import ZipFile
import json
from dataclasses import dataclass
import re


JSON_FOLDER_NAME = 'Academic_Cal-j'

@dataclass
class DataEntry:
start_date: datetime = datetime.today()
end_date: datetime = datetime.today()
event: str = ""

#get the current working directory
def cwd():
return os.getcwd()

def get_latest_calendar_name():
curr_year = datetime.today().year
curr_month = datetime.today().month

if(curr_month < 7):
curr_year -= 1

year_str = str(curr_year) + '_' + str((curr_year % 100) + 1)
filename = 'ACADEMIC_CALENDAR_' + year_str + '.pdf'
return filename

def is_file_present(file):
if(os.path.exists(cwd() + '/' + file) or
os.path.exists(cwd() + '/' + file + '/')
):
return True
return False

def delete_file(file):
if(is_file_present(file)):
try:
print("DELETING file ",file)
if(os.path.isdir(file)):
shutil.rmtree(cwd() + '/' + file)
elif(os.path.isfile(file)):
os.remove(file)
else:
raise Exception("filename not valid")
except Exception as e:
print("ERROR: seems file already exists but cannot be deleted")
print(e)
return False
else:
print(file, "File not present..")

#fetch the latest academic calendar from the iitkgp website
def get_latest_calendar():

filename = get_latest_calendar_name()
url = 'https://www.iitkgp.ac.in/assets/pdf/' + filename

## delete any old academic calander pdf if exists
if(is_file_present(filename)):
delete_file(filename)

with open(filename,"wb") as file:
response = requests.get(url)
file.write(response.content)

if(is_file_present(filename)):
return True
return False

def upzip_and_delete_zip(zip_file_name,result_folder_name):
with ZipFile(zip_file_name) as zip:
try:
zip.extractall(result_folder_name)
except Exception as E:
print(E)
return False

print("Zip File not needed anymore, Deleteting ", zip_file_name)
delete_file(zip_file_name)
return True

def export_json():
filename = get_latest_calendar_name()
## ignore the read_pdf not found warning
tables = camelot.read_pdf(filename,pages="all")

print("Checking for pre-existing folder")
delete_file(JSON_FOLDER_NAME)

try:
tables.export((JSON_FOLDER_NAME + '.json'),f='json',compress=True)
except Exception as E:
print(E)
return False

upzip_and_delete_zip((JSON_FOLDER_NAME + '.zip'),JSON_FOLDER_NAME)
return True

def get_json_files():
folder_path = cwd() + '/' + JSON_FOLDER_NAME
if(is_file_present(JSON_FOLDER_NAME)):
files = glob.glob(folder_path + '/*.json',include_hidden=True)
return files
else:
return []

def merge_json():
merged_data = []
for file in get_json_files():
with open(file) as f:
data = json.load(f)
merged_data.extend(data)

with open('final.json',"w") as f:
json.dump(merged_data,f,indent=4)

return merged_data

def get_academic_calendar() -> list[DataEntry]:

get_latest_calendar()
export_json()

all_dates = merge_json()
all_dates = all_dates[1:]

main_dates = []
# for date in all_dates:
# entry = DataEntry()
# if(len(date) > 4 and date['4'] != ''):
# if(len(date['1']) > 3):
# entry.event += date['1'].replace('\n','')
# entry.event += date['2'].replace('\n','')
# d = date['4'].replace('\n',' ').replace('(AN)','')
# print(d.find("to"))
# if(d.lower().find("to") != -1):
# d = str(d).lower().split("to")
# entry.start_date = datetime.strptime(d[0].split(" ")[0].strip(), "%d.%m.%Y")
# entry.end_date = datetime.strptime(d[-1].split(" ")[-1].strip(), "%d.%m.%Y")
# else:
# entry.start_date = datetime.strptime(d,"%d.%m.%Y")
# entry.end_date = ( entry.start_date + timedelta(1) )
# # elif(len(date) == 2 and date['1'] != ''):
# # entry.event = date['0']
# # d = date['1'].replace('\n','')
# # if(d.find("to")):
# # d = str(d).split("to")
# # entry.start_date = datetime.strptime(d[0].strip(), "%A, %d %B %Y")
# # entry.end_date = datetime.strptime(d[1].strip(), "%A, %d %B %Y")
# # else:
# # entry.start_date = datetime.strptime(d,"%A, %d %B %Y")
# # entry.end_date = ( entry.start_date + timedelta(1) )
# # main_dates.append([date['0'],datetime_object])
# main_dates.append(entry)

date_regex = re.compile(r'\d{2}.\d{2}.\d{4}')
maxLen = 1
for date in all_dates:
if(len(date) > 4 and date['4'] != ''):
entry = DataEntry()
if(len(date['1']) > 3):
entry.event += date['1'].replace('\n','')
entry.event += date['2'].replace('\n','')

d =date['3'].replace('\n',' ').replace('(AN)','') + date['4'].replace('\n',' ').replace('(AN)','')
d = date_regex.findall(d)
if(maxLen < len(d)):
maxLen = len(d)
if(len(d) == 1):
entry.start_date = datetime.strptime(d[0],"%d.%m.%Y")
entry.end_date = ( entry.start_date + timedelta(1) )
elif(len(d) == 2):
entry.start_date = datetime.strptime(d[0],"%d.%m.%Y")
entry.end_date = datetime.strptime(d[1],"%d.%m.%Y")
main_dates.append(entry)
annual_convocation = str(date['1']).strip().lower().split(" ")
## KGP hai .. cannot trust, they can even mess up the spellings of annual convocation
## this can just reduce the amount of places this will fail
if(len(annual_convocation) == 2 and ("annual" in annual_convocation or "convocation" in annual_convocation)):
break

return main_dates

0 comments on commit 95d58f7

Please sign in to comment.