Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Qiita記事3本目] 確率的主成分分析 #125

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'matplotlib.pyplot'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-12-584a9621a47a>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;31m#numpyをインストール\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 6\u001b[1;33m \u001b[1;32mimport\u001b[0m \u001b[0mmatplotlib\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpyplot\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mplt\u001b[0m\u001b[1;31m#グラフを書くやつ\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 7\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mmpl_toolkits\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmplot3d\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mAxes3D\u001b[0m\u001b[1;31m#3次元plotを行うためのツール\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 8\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpreprocessing\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mNormalizer\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'matplotlib.pyplot'"
]
}
],
"source": [
"\n",
"\n",
"#PCA:主成分分析\n",
"# Code source: Gaël Varoquaux\n",
"# License: BSD 3 clause\n",
"\n",
"import numpy as np#numpyをインストール\n",
"import matplotlib.pyplot as plt#グラフを書くやつ\n",
"from mpl_toolkits.mplot3d import Axes3D#3次元plotを行うためのツール\n",
"from sklearn.preprocessing import Normalizer\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn import decomposition#PCAのライブラリ\n",
"from sklearn import datasets#datasetsを取ってくる\n",
"\n",
"\n",
"def Estep(W,X,N,sigma):\n",
" M=np.dot(W,W.T)+sigma*np.eye(N)\n",
" Minv=np.linalg.inv(W)\n",
" Ez_n=np.dot(Minv,W.T)\n",
" Ez_n=np.dot(Ez_n,X)\n",
" Ez_nz_nt=sigma*Minv+np.dot(Ez_n,Ez_n.T)\n",
" return Ez_n,Ez_nz_nt\n",
"def Mstep(Ez_n,Ez_nz_nt,X,N,D):\n",
" a=np.dot(X,Ez_n.T)\n",
" b=np.linalg.inv(sam(Ez_nz_nt))\n",
" Wnew=np.dot(sum(a),b)\n",
" nor = np.dot(X,X.T)\n",
" tenti = np.dot(Ez_n.T,Wnew.T)\n",
" W2 = np.dot(Wnew.T,Wnew)\n",
" hoge = np.dot(Ez_nz_nt,trace)\n",
" tr = hoge.trace()\n",
" sig = nor-2*np.dot(tenti,X)+tr\n",
" signew = sum(sig)/(N*D)\n",
" return Wnew,signew\n",
"n_component=2\n",
"np.set_printoptions(suppress=True)\n",
"#centers = [[1, 1], [-1, -1], [1, -1]]#3次元配列を作ってる\n",
"#print(centers)\n",
"fig = plt.figure(2, figsize=(14, 6))\n",
"iris = datasets.load_iris()\n",
"X = iris.data\n",
"y = iris.target\n",
"N=150\n",
"D=4\n",
"M=2\n",
"W =np.random.randn(4, 2)\n",
"sigma = np.array([[1]])\n",
"for i in range(4):#各データの平均を元のデータから引いている\n",
" mean = np.mean(X[:,i])\n",
" X[:,i]=(X[:,i]-mean)\n",
"for i in range(step):\n",
" Ez_n,Ez_nz_nt = Estep(W,X,N,sigma)\n",
" W,sigma = Mstep(Ez_n,Ez_nz_nt,X,N,D)\n",
" \n",
" \n",
"'''\n",
"X_cov=np.dot(X.T,X)#共分散行列を生成\n",
"w,v=np.linalg.eig(X_cov)#共分散行列の固有値、固有ベクトルを算出(固有値は既に大きさ順に並べられている)\n",
"for i in range(n_component):#固有値の大きい順に固有ベクトルを取り出す\n",
" Xpc[i]=v[:,i]\n",
"Xpc=np.array(Xpc)\n",
"'''\n",
"Xafter=np.dot(W,X.T)#取り出した固有ベクトルでデータを線形写像する\n",
"\n",
"\n",
"pca = decomposition.PCA(n_components=2)\n",
"pca.fit(X)\n",
"Xlib = pca.transform(X) \n",
"\n",
"\n",
"ax1 = fig.add_subplot(121)\n",
"for label in np.unique(y):\n",
" ax1.scatter(Xlib[y == label, 0],\n",
" Xlib[y == label, 1])\n",
"ax1.set_title('library pca')\n",
"ax1.set_xlabel(\"X_axis\")\n",
"ax1.set_ylabel(\"Y_axis\")\n",
"ax2 = fig.add_subplot(122)\n",
"for label in np.unique(y):\n",
" ax2.scatter(Xafter[y == label, 0],\n",
" Xafter[y == label, 1])\n",
"ax2.set_title('original pca')\n",
"ax2.set_xlabel(\"X_axis\")\n",
"ax2.set_ylabel(\"Y_axis\")\n",
"plt.show()\n",
"\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
131 changes: 131 additions & 0 deletions trainee/takano/WorkOut_Report/Qiita3_ppca/Untitled.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"ename": "ModuleNotFoundError",
"evalue": "No module named 'matplotlib.pyplot'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-12-584a9621a47a>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mnumpy\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;31m#numpyをインストール\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 6\u001b[1;33m \u001b[1;32mimport\u001b[0m \u001b[0mmatplotlib\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpyplot\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mplt\u001b[0m\u001b[1;31m#グラフを書くやつ\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 7\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mmpl_toolkits\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmplot3d\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mAxes3D\u001b[0m\u001b[1;31m#3次元plotを行うためのツール\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 8\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0msklearn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mpreprocessing\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mNormalizer\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'matplotlib.pyplot'"
]
}
],
"source": [
"\n",
"\n",
"#PCA:主成分分析\n",
"# Code source: Gaël Varoquaux\n",
"# License: BSD 3 clause\n",
"\n",
"import numpy as np#numpyをインストール\n",
"import matplotlib.pyplot as plt#グラフを書くやつ\n",
"from mpl_toolkits.mplot3d import Axes3D#3次元plotを行うためのツール\n",
"from sklearn.preprocessing import Normalizer\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn import decomposition#PCAのライブラリ\n",
"from sklearn import datasets#datasetsを取ってくる\n",
"\n",
"\n",
"def Estep(W,X,N,sigma):\n",
" M=np.dot(W,W.T)+sigma*np.eye(N)\n",
" Minv=np.linalg.inv(W)\n",
" Ez_n=np.dot(Minv,W.T)\n",
" Ez_n=np.dot(Ez_n,X)\n",
" Ez_nz_nt=sigma*Minv+np.dot(Ez_n,Ez_n.T)\n",
" return Ez_n,Ez_nz_nt\n",
"def Mstep(Ez_n,Ez_nz_nt,X,N,D):\n",
" a=np.dot(X,Ez_n.T)\n",
" b=np.linalg.inv(sam(Ez_nz_nt))\n",
" Wnew=np.dot(sum(a),b)\n",
" nor = np.dot(X,X.T)\n",
" tenti = np.dot(Ez_n.T,Wnew.T)\n",
" W2 = np.dot(Wnew.T,Wnew)\n",
" hoge = np.dot(Ez_nz_nt,trace)\n",
" tr = hoge.trace()\n",
" sig = nor-2*np.dot(tenti,X)+tr\n",
" signew = sum(sig)/(N*D)\n",
" return Wnew,signew\n",
"n_component=2\n",
"np.set_printoptions(suppress=True)\n",
"#centers = [[1, 1], [-1, -1], [1, -1]]#3次元配列を作ってる\n",
"#print(centers)\n",
"fig = plt.figure(2, figsize=(14, 6))\n",
"iris = datasets.load_iris()\n",
"X = iris.data\n",
"y = iris.target\n",
"N=150\n",
"D=4\n",
"M=2\n",
"W =np.random.randn(4, 2)\n",
"sigma = np.array([[1]])\n",
"for i in range(4):#各データの平均を元のデータから引いている\n",
" mean = np.mean(X[:,i])\n",
" X[:,i]=(X[:,i]-mean)\n",
"for i in range(step):\n",
" Ez_n,Ez_nz_nt = Estep(W,X,N,sigma)\n",
" W,sigma = Mstep(Ez_n,Ez_nz_nt,X,N,D)\n",
" \n",
" \n",
"'''\n",
"X_cov=np.dot(X.T,X)#共分散行列を生成\n",
"w,v=np.linalg.eig(X_cov)#共分散行列の固有値、固有ベクトルを算出(固有値は既に大きさ順に並べられている)\n",
"for i in range(n_component):#固有値の大きい順に固有ベクトルを取り出す\n",
" Xpc[i]=v[:,i]\n",
"Xpc=np.array(Xpc)\n",
"'''\n",
"Xafter=np.dot(W,X.T)#取り出した固有ベクトルでデータを線形写像する\n",
"\n",
"\n",
"pca = decomposition.PCA(n_components=2)\n",
"pca.fit(X)\n",
"Xlib = pca.transform(X) \n",
"\n",
"\n",
"ax1 = fig.add_subplot(121)\n",
"for label in np.unique(y):\n",
" ax1.scatter(Xlib[y == label, 0],\n",
" Xlib[y == label, 1])\n",
"ax1.set_title('library pca')\n",
"ax1.set_xlabel(\"X_axis\")\n",
"ax1.set_ylabel(\"Y_axis\")\n",
"ax2 = fig.add_subplot(122)\n",
"for label in np.unique(y):\n",
" ax2.scatter(Xafter[y == label, 0],\n",
" Xafter[y == label, 1])\n",
"ax2.set_title('original pca')\n",
"ax2.set_xlabel(\"X_axis\")\n",
"ax2.set_ylabel(\"Y_axis\")\n",
"plt.show()\n",
"\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}