diff --git a/EksplorasiData_Praveen Arvind Krisna.ipynb b/EksplorasiData_Praveen Arvind Krisna.ipynb new file mode 100644 index 0000000..1361718 --- /dev/null +++ b/EksplorasiData_Praveen Arvind Krisna.ipynb @@ -0,0 +1,318 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Nama : Praveen Arvind Krisna\n", + "#NIM : 13219022\n", + "#Asal Universitas : Institut Teknologi Bandung\n", + "#Tanggal pengerjaan tugas : 20/11/2020\n", + "#Sumber Dataset = TakeMeOut\n", + "#Alur data : Data Observation - Data Cleaning - Data Splitting - Model Development\n", + "import pandas as pd\n", + "import array as ar\n", + "from sklearn.neighbors import NearestNeighbors\n", + "import numpy as np\n", + "from scipy import stats" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('takemeout.csv')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Data Cleaning" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = df.rename(columns = {'Siapa nama kamu?':'nama', 'Cewek atau cowok nih?':'gender', 'Seberapa penting quality time bareng calon pacar untuk kamu?':'time','Seberapa penting physical touch sama calon pacar untuk kamu?':'touch','Seberapa penting word of affirmation dari calon pacar untuk kamu?':'word', 'Seberapa penting dapet kado dari calon pacar untuk kamu?':'kado','Seberapa penting bantuan dari calon pacar untuk kamu?':'help' })" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df = df.drop('Timestamp', axis=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Data Splitting" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_m = df[df['gender'] == \"Cowok\"]\n", + "m_val = df_m.drop(['nama', 'gender'], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "df_f = df[df['gender'] == \"Cewek\"]\n", + "f_val = df_f.drop(['nama', 'gender'], axis=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Basic Statistic group by Gender" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Rata rata tiap gender untuk tiap kolom\n", + "df.groupby([\"gender\"]).mean()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#value minimal tiap gender untuk tiap kolom\n", + "df.groupby([\"gender\"]).min()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#value maksimal tiap gender untuk tiap kolom\n", + "df.groupby([\"gender\"]).max()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#nilai tengah untuk tiap gender di tiap kolom\n", + "df.groupby([\"gender\"]).median()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Nilai terbanyak untuk tiap kolom dari gender sampai help\n", + "dropNama = df.drop(['nama'], axis=1)\n", + "stats.mode(dropNama)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#jumlah cowok\n", + "len(m_val)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#jumlah cewek\n", + "len(f_val)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Model Development" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "m_coup = NearestNeighbors(n_neighbors=5).fit(m_val)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "f_coup = NearestNeighbors(n_neighbors=5).fit(f_val)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "x = df.drop(['nama', 'gender'], axis=1)\n", + "df_new = df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "nama = df_new['nama'] == 'P********************'\n", + "a = x[nama].values\n", + "p = 0\n", + "for i in range(len(df)):\n", + " if(nama[i] == True):\n", + " p = df.at[i, 'gender']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "hasil = 0\n", + "if(p == \"Cowok\"):\n", + " __, hasil = f_coup.kneighbors(a)\n", + "else:\n", + " __, hasil = m_coup.kneighbors(a)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "#Analisis untuk 5 pasangan terdekat dengan total values yang sama\n", + "df.iloc[hasil[0]]['nama']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Analisis Untuk pasangan tercocok yang memiliki values yang sama persis\n", + "df_def = 0\n", + "df_comp = 0\n", + "if(p == \"Cewek\"):\n", + " df_def = df_m.drop(['nama', 'gender'], axis=1)\n", + " df_comp = df_m\n", + "elif(p == \"Cowok\"):\n", + " df_def = df_f.drop(['nama', 'gender'], axis=1)\n", + " df_comp = df_f\n", + "jumlah = 0\n", + "for i in range(len(df_def)):\n", + " if(np.all(a == df_def[i:i+1].values)):\n", + " print(df_comp[i:i+1]['nama'])\n", + " else:\n", + " jumlah += 1\n", + "if(jumlah == len(df_def)):\n", + " print(\"Tidak ada pasangan yang cocok sekali\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#Analisis Untuk Pasangan dengan range maksimal 1 poin untuk masing masing values\n", + "jumlah = 0\n", + "for i in range(len(df_def)):\n", + " data = df_def[i:i+1].values\n", + " counter = 0\n", + " for j in range(5):\n", + " if(data[0, j] <= a[0, j]+1):\n", + " if(data[0, j] >= a[0, j]-1):\n", + " counter += 1\n", + " if(counter == 5):\n", + " print(df_comp[i:i+1]['nama'])\n", + " counter = 0\n", + " else: \n", + " counter = 0\n", + " jumlah += 1\n", + "if(jumlah == len(df_def)):\n", + " print(\"Tidak ada pasangan yang cocok sekali\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/bucket.txt b/bucket.txt new file mode 100644 index 0000000..c30a310 --- /dev/null +++ b/bucket.txt @@ -0,0 +1,2 @@ +Praveen_Arvind_Krisna +SubmissionBucket \ No newline at end of file diff --git a/takemeout.csv b/takemeout.csv new file mode 100644 index 0000000..cf476a7 --- /dev/null +++ b/takemeout.csv @@ -0,0 +1,102 @@ +Timestamp,Siapa nama kamu?,Cewek atau cowok nih?,Seberapa penting quality time bareng calon pacar untuk kamu?,Seberapa penting physical touch sama calon pacar untuk kamu?,Seberapa penting word of affirmation dari calon pacar untuk kamu?,Seberapa penting dapet kado dari calon pacar untuk kamu?,Seberapa penting bantuan dari calon pacar untuk kamu? +2020/10/31 3:39:25 PM GMT+7,A**************,Cowok,5,5,4,1,3 +2020/10/31 3:39:36 PM GMT+7,L****,Cewek,5,5,3,2,2 +2020/10/31 3:39:38 PM GMT+7,Y***********,Cowok,4,3,4,4,4 +2020/10/31 3:39:42 PM GMT+7,a***,Cowok,5,5,5,2,3 +2020/10/31 3:39:43 PM GMT+7,B****,Cowok,5,5,5,2,4 +2020/10/31 3:39:44 PM GMT+7,M*********,Cowok,5,5,4,2,2 +2020/10/31 3:39:45 PM GMT+7,F***********************************,Cowok,4,2,5,3,4 +2020/10/31 3:39:47 PM GMT+7,F***,Cowok,3,3,4,2,4 +2020/10/31 3:39:50 PM GMT+7,R******************,Cowok,5,2,4,1,4 +2020/10/31 3:40:05 PM GMT+7,s******,Cowok,4,1,1,5,5 +2020/10/31 3:40:07 PM GMT+7,A,Cowok,5,4,5,2,3 +2020/10/31 3:40:08 PM GMT+7,R**,Cowok,3,4,3,3,5 +2020/10/31 3:40:12 PM GMT+7,D***,Cowok,5,4,3,3,4 +2020/10/31 3:40:14 PM GMT+7,F***,Cowok,1,3,5,1,5 +2020/10/31 3:40:20 PM GMT+7,P****,Cowok,4,3,4,5,5 +2020/10/31 3:40:21 PM GMT+7,A*,Cowok,4,3,5,4,4 +2020/10/31 3:40:22 PM GMT+7,M***********************,Cowok,1,1,1,1,1 +2020/10/31 3:40:25 PM GMT+7,J****************,Cowok,5,4,5,3,5 +2020/10/31 3:40:27 PM GMT+7,J***,Cowok,5,3,4,3,4 +2020/10/31 3:40:28 PM GMT+7,M**************,Cewek,5,5,5,5,5 +2020/10/31 3:40:31 PM GMT+7,F*************,Cewek,4,2,3,1,3 +2020/10/31 3:40:31 PM GMT+7,b***********,Cowok,5,4,3,2,3 +2020/10/31 3:40:31 PM GMT+7,u***,Cowok,5,3,3,5,5 +2020/10/31 3:40:34 PM GMT+7,B***,Cewek,5,2,4,5,4 +2020/10/31 3:40:35 PM GMT+7,h***,Cowok,5,4,4,4,3 +2020/10/31 3:40:36 PM GMT+7,E***,Cewek,3,2,3,1,5 +2020/10/31 3:40:36 PM GMT+7,q*,Cowok,5,5,5,5,5 +2020/10/31 3:40:37 PM GMT+7,v****,Cewek,3,3,3,3,3 +2020/10/31 3:40:40 PM GMT+7,R****,Cowok,5,3,3,2,3 +2020/10/31 3:40:41 PM GMT+7,D****,Cowok,4,5,4,2,4 +2020/10/31 3:40:42 PM GMT+7,A*************,Cowok,3,3,3,3,3 +2020/10/31 3:40:42 PM GMT+7,B**************,Cowok,3,2,4,1,5 +2020/10/31 3:40:42 PM GMT+7,S************,Cowok,2,2,2,1,1 +2020/10/31 3:40:43 PM GMT+7,H***,Cewek,4,2,2,2,5 +2020/10/31 3:40:43 PM GMT+7,J*****,Cowok,5,3,4,3,4 +2020/10/31 3:40:44 PM GMT+7,R****,Cowok,5,3,5,5,5 +2020/10/31 3:40:45 PM GMT+7,a******,Cowok,3,1,3,1,2 +2020/10/31 3:40:45 PM GMT+7,R******************,Cowok,5,3,3,1,5 +2020/10/31 3:40:47 PM GMT+7,:*,Cowok,5,2,2,2,2 +2020/10/31 3:40:49 PM GMT+7,L*********,Cowok,4,3,3,5,3 +2020/10/31 3:40:51 PM GMT+7,I***,Cowok,4,5,5,3,5 +2020/10/31 3:40:51 PM GMT+7,P******,Cowok,3,3,3,2,4 +2020/10/31 3:40:52 PM GMT+7,G**,Cowok,4,3,3,4,4 +2020/10/31 3:40:54 PM GMT+7,S**,Cowok,5,3,5,4,5 +2020/10/31 3:40:56 PM GMT+7,V***,Cowok,5,2,3,2,4 +2020/10/31 3:40:57 PM GMT+7,G************************,Cowok,5,5,5,5,5 +2020/10/31 3:40:58 PM GMT+7,A********************,Cewek,1,1,1,1,1 +2020/10/31 3:41:00 PM GMT+7,H***********,Cewek,4,1,3,1,5 +2020/10/31 3:41:02 PM GMT+7,R*************,Cowok,4,3,4,3,4 +2020/10/31 3:41:03 PM GMT+7,A*****,Cewek,5,3,4,4,5 +2020/10/31 3:41:04 PM GMT+7,I*,Cowok,3,1,1,1,4 +2020/10/31 3:41:05 PM GMT+7,i****,Cowok,5,1,5,1,3 +2020/10/31 3:41:06 PM GMT+7,C**********,Cowok,5,4,4,3,3 +2020/10/31 3:41:08 PM GMT+7,r******,Cowok,5,5,5,5,5 +2020/10/31 3:41:09 PM GMT+7,H****,Cewek,1,1,1,1,1 +2020/10/31 3:41:09 PM GMT+7,A*************,Cowok,4,4,5,3,5 +2020/10/31 3:41:09 PM GMT+7,K**********,Cowok,4,3,4,2,3 +2020/10/31 3:41:10 PM GMT+7,P********************,Cowok,3,3,3,3,3 +2020/10/31 3:41:11 PM GMT+7,T***************************,Cowok,5,3,4,3,5 +2020/10/31 3:41:11 PM GMT+7,A***************************,Cowok,1,1,2,1,4 +2020/10/31 3:41:12 PM GMT+7,M************,Cowok,5,3,4,2,4 +2020/10/31 3:41:12 PM GMT+7,a*****,Cowok,4,3,3,2,4 +2020/10/31 3:41:14 PM GMT+7,S,Cewek,3,2,2,3,4 +2020/10/31 3:41:14 PM GMT+7,A******,Cowok,5,4,4,4,3 +2020/10/31 3:41:15 PM GMT+7,M*,Cewek,5,1,5,3,5 +2020/10/31 3:41:16 PM GMT+7,w****,Cewek,1,1,1,1,3 +2020/10/31 3:41:19 PM GMT+7,a***,Cowok,4,3,5,3,4 +2020/10/31 3:41:20 PM GMT+7,L*******,Cowok,5,5,5,3,4 +2020/10/31 3:41:21 PM GMT+7,f*****,Cowok,5,5,5,3,4 +2020/10/31 3:41:22 PM GMT+7,N****,Cowok,5,4,4,3,4 +2020/10/31 3:41:22 PM GMT+7,a******,Cewek,5,4,5,5,5 +2020/10/31 3:41:23 PM GMT+7,A*************,Cowok,3,1,4,2,5 +2020/10/31 3:41:26 PM GMT+7,N*****,Cewek,4,1,3,2,4 +2020/10/31 3:41:28 PM GMT+7,R***,Cewek,4,4,4,3,5 +2020/10/31 3:41:28 PM GMT+7,R****,Cowok,4,3,5,4,4 +2020/10/31 3:41:28 PM GMT+7,A********,Cowok,5,5,5,5,5 +2020/10/31 3:41:29 PM GMT+7,m*,Cowok,1,1,1,1,1 +2020/10/31 3:41:34 PM GMT+7,g*****,Cowok,4,3,4,1,4 +2020/10/31 3:41:40 PM GMT+7,A********,Cowok,4,4,4,4,4 +2020/10/31 3:41:40 PM GMT+7,a********************************************************,Cewek,4,4,5,3,2 +2020/10/31 3:41:40 PM GMT+7,R****,Cowok,5,5,4,3,3 +2020/10/31 3:41:49 PM GMT+7,t*****,Cowok,5,2,4,3,4 +2020/10/31 3:41:52 PM GMT+7,d****,Cowok,5,5,5,1,5 +2020/10/31 3:41:59 PM GMT+7,j****,Cowok,4,3,3,2,3 +2020/10/31 3:42:02 PM GMT+7,W*,Cowok,5,3,4,3,3 +2020/10/31 3:42:04 PM GMT+7,a**,Cowok,4,3,3,3,4 +2020/10/31 3:42:05 PM GMT+7,I*********,Cowok,4,5,5,4,4 +2020/10/31 3:42:08 PM GMT+7,A**********,Cowok,5,3,4,3,5 +2020/10/31 3:42:16 PM GMT+7,u*********************,Cowok,5,5,5,3,3 +2020/10/31 3:42:21 PM GMT+7,D***,Cowok,5,3,4,2,3 +2020/10/31 3:42:22 PM GMT+7,a*************,Cowok,4,3,4,3,4 +2020/10/31 3:42:27 PM GMT+7,M**,Cowok,5,4,4,2,5 +2020/10/31 3:42:37 PM GMT+7,D********,Cowok,5,3,5,1,3 +2020/10/31 3:42:37 PM GMT+7,F******,Cewek,5,4,5,4,4 +2020/10/31 3:42:45 PM GMT+7,B***,Cowok,1,5,1,1,1 +2020/10/31 3:42:46 PM GMT+7,N****,Cowok,5,4,4,4,5 +2020/10/31 3:42:57 PM GMT+7,A********,Cowok,5,1,4,2,3 +2020/10/31 3:43:02 PM GMT+7,K************,Cewek,4,5,4,4,3 +2020/10/31 3:43:04 PM GMT+7,M*,Cowok,5,5,5,5,4 +2020/10/31 3:43:11 PM GMT+7,K********************,Cowok,1,1,1,1,1 +2020/10/31 3:43:22 PM GMT+7,B***********,Cowok,5,5,5,5,4