-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp1.py
114 lines (90 loc) · 4.38 KB
/
app1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# -*- coding: utf-8 -*-
"""app1.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/12mKen8V8UQH6UcQ7d7jSUtOFeVt3ojqI
"""
import pandas as pd
import streamlit as st
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
# Function to calculate similarity scores between mentees and mentors
def calculate_similarity(mentee_df, mentor_df):
# Combine mentee and mentor skills into lists of text (for vectorization)
mentee_skills = mentee_df['Skills'].tolist()
mentor_skills = mentor_df['Skills'].tolist()
# Initialize TF-IDF vectorizer
vectorizer = TfidfVectorizer(stop_words='english')
# Combine all skills into a single list for fitting the vectorizer
all_skills = mentee_skills + mentor_skills
# Fit the vectorizer on all skills and transform them into vectors
tfidf_matrix = vectorizer.fit_transform(all_skills)
# Separate mentee and mentor skill vectors from the full matrix
mentee_tfidf = tfidf_matrix[:len(mentee_skills)]
mentor_tfidf = tfidf_matrix[len(mentee_skills):]
# Calculate cosine similarity between each mentee and mentor pair
similarity_scores = cosine_similarity(mentee_tfidf, mentor_tfidf)
return similarity_scores
# Function to match mentees with mentors based on similarity scores
def match_mentor_mentee(mentees, mentors, similarity_scores):
matches = []
for i, mentee in mentees.iterrows():
# Find the mentor with the highest similarity score for this mentee
best_match_idx = similarity_scores[i].argmax()
best_mentor = mentors.iloc[best_match_idx]
matches.append({
"Mentee Name": mentee['Name'],
"Mentor Name": best_mentor['Name'],
"Mentee Skills": mentee['Skills'],
"Mentor Skills": best_mentor['Skills'],
"Need": mentee['Need'],
"Availability": best_mentor['Availability'],
"Similarity Score": similarity_scores[i][best_match_idx]
})
return pd.DataFrame(matches)
# Streamlit UI
st.title("Mentor-Mentee Matching with Similarity Scores")
# Input for mentee data
st.header("Enter Mentee Data")
mentee_data = []
num_mentees = st.number_input("How many mentees?", min_value=1, max_value=10, value=1)
for i in range(num_mentees):
mentee_name = st.text_input(f"Mentee Name {i+1}", key=f"mentee_name_{i}")
mentee_skills = st.text_input(f"Mentee Skills {i+1}", key=f"mentee_skills_{i}")
mentee_need = st.text_input(f"Mentee Need {i+1}", key=f"mentee_need_{i}")
mentee_data.append([mentee_name, mentee_skills, mentee_need])
# Convert mentee data to DataFrame
mentee_df = pd.DataFrame(mentee_data, columns=["Name", "Skills", "Need"])
# Input for mentor data
st.header("Enter Mentor Data")
mentor_data = []
num_mentors = st.number_input("How many mentors?", min_value=1, max_value=10, value=1)
for i in range(num_mentors):
mentor_name = st.text_input(f"Mentor Name {i+1}", key=f"mentor_name_{i}")
mentor_skills = st.text_input(f"Mentor Skills {i+1}", key=f"mentor_skills_{i}")
mentor_availability = st.selectbox(f"Mentor Availability {i+1}", ["Full-time", "Part-time"], key=f"mentor_availability_{i}")
mentor_data.append([mentor_name, mentor_skills, mentor_availability])
# Convert mentor data to DataFrame
mentor_df = pd.DataFrame(mentor_data, columns=["Name", "Skills", "Availability"])
# If data is entered, proceed with matching
if st.button("Match Mentors and Mentees"):
if not mentee_df.empty and not mentor_df.empty:
# Calculate similarity scores
similarity_scores = calculate_similarity(mentee_df, mentor_df)
# Perform matching based on the highest similarity score
matches_df = match_mentor_mentee(mentee_df, mentor_df, similarity_scores)
if not matches_df.empty:
matches_df.index = matches_df.index + 1
st.write("Matching Results:")
st.dataframe(matches_df)
# Allow downloading of the result as a CSV file
st.download_button(
label="Download Matched Results",
data=matches_df.to_csv(index=False),
file_name="mentor_mentee_matches.csv",
mime="text/csv"
)
else:
st.write("No matches found based on the given criteria.")
else:
st.write("Please enter both mentee and mentor data.")