Added simulation example.

ISM returns now a dictionary. ISM's input is now a list of 2D arrays. Completed README with simulation example.
Advestis · Apr 9, 2024 · 381ece8 · 381ece8
1 parent 9dc2484
commit 381ece8
Show file tree

Hide file tree

Showing 6 changed files with 160 additions and 126 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,3 @@
 __pycache__/
 example/.ipynb_checkpoints/
+adilsm.egg-info/
diff --git a/adilsm/adilsm.py b/adilsm/adilsm.py
@@ -41,7 +41,7 @@ def generate_h4_sparse(h4, q4_ism, n_items, n_comp, n_scores, sparsity_coeff=.8)
     return h4_sparse, hhii
 
 def integrate_scores(m0_nan_0, m0_weight, h4_sparse, w4_ism, h4_ism, q4_ism, n_scores, n_items, n_themes, update_h4_ism=False,
-                     max_iter_mult=200, sparsity_coeff=.8):
+                     max_iter_mult=200, fast_mult_rules=True, sparsity_coeff=.8):
 
     EPSILON = np.finfo(np.float32).eps
 
@@ -60,18 +60,14 @@ def integrate_scores(m0_nan_0, m0_weight, h4_sparse, w4_ism, h4_ism, q4_ism, n_s
         # Apply multiplicative updates to preserve h sparsity   
         for _ in range(0, max_iter_mult):
             # Weighted multiplicative rules
-            # m0_score_est = (m0_score / (w4_score @ h4_score.T+EPSILON))*m0_weight_score
-            # h4_score *= (w4_score.T @ m0_score_est).T
-            # w4_score *= m0_score_est @ h4_score 
-
-            m0_score_est = (w4_score @ h4_score.T)*m0_weight_score
-            h4_score *= ((w4_score.T @ m0_score) / (w4_score.T @ m0_score_est + EPSILON)).T
-            w4_score *= (m0_score @ h4_score / (m0_score_est @ h4_score + EPSILON))
-
-            # h4_score *= ((w4_score.T @ m0_score) / (w4_score.T @ ((w4_score @ h4_score.T)*m0_weight_score) + EPSILON)).T
-            # w4_score *= (m0_score @ h4_score / ((m0_weight_score*(w4_score @ h4_score.T)) @ h4_score + EPSILON))
-
-        # print(h4_score)
+            if fast_mult_rules:
+                m0_score_est = (w4_score @ h4_score.T)*m0_weight_score
+                h4_score *= ((w4_score.T @ m0_score) / (w4_score.T @ m0_score_est + EPSILON)).T
+                w4_score *= (m0_score @ h4_score / (m0_score_est @ h4_score + EPSILON))
+            else:
+                h4_score *= ((w4_score.T @ m0_score) / (w4_score.T @ ((w4_score @ h4_score.T)*m0_weight_score) + EPSILON)).T
+                w4_score *= (m0_score @ h4_score / ((m0_weight_score*(w4_score @ h4_score.T)) @ h4_score + EPSILON))
+
         # Normalize w4_score by max column and update h4_score
         max_values = np.max(w4_score, axis=0)
         # Replace maximum values equal to 0 with 1
@@ -104,13 +100,13 @@ def integrate_scores(m0_nan_0, m0_weight, h4_sparse, w4_ism, h4_ism, q4_ism, n_s
 
     return h4_updated, h4_updated_sparse, hhii_updated, w4_ism, h4_ism, q4_ism, tensor_score
 
-def ism(m0, n_embedding, n_themes, n_scores, n_items, norm_m0 = True, max_iter=200, tol=1.e-6, verbose=-1, random_state=0, 
-        max_iter_integrate=20, max_iter_mult=200, update_h4_ism=False, sparsity_coeff=.8):
+def ism(m0:np.array, n_embedding:int, n_themes:int, n_scores:int, n_items:list[int], norm_m0:bool = True, max_iter:int=200, tol:float=1.e-6, verbose:int=-1, random_state:int=0, 
+        max_iter_integrate:int=20, max_iter_mult:int=200, fast_mult_rules:bool=True, update_h4_ism:bool=False, sparsity_coeff:float=.8):
     """Estimate ISM model
 
     Parameters
     ----------
-    m0: float
+    m0: NDArray
         Matrix of views, concatenated horizontally.
     n_embedding: integer
         Dimension of the embedding space.
@@ -137,6 +133,8 @@ def ism(m0, n_embedding, n_themes, n_scores, n_items, norm_m0 = True, max_iter=2
         Max number of iterations during the straightening process.
     max_iter_mult: integer, default: 200
         Max number of iterations of NMF multiplicative updates during the embedding process.
+    fast_mult_rules: boolean, default True
+        Use common matrix estimate in w and h updates
     update_h4_ism: boolean, default False
         Update or not the NTF factoring matrix H*.
     sparsity_coeff:
@@ -190,7 +188,7 @@ def ism(m0, n_embedding, n_themes, n_scores, n_items, norm_m0 = True, max_iter=2
     # Embed using scores w4 found in preliminary NMF and initialize themes through NTF       
     h4_updated, h4_updated_sparse, hhii_updated, w4_ism, h4_ism, q4_ism, tensor_score = \
         integrate_scores(m0_nan_0, m0_weight, h4_sparse, w4, None, None, n_scores, n_items, n_themes, update_h4_ism=True,
-                         max_iter_mult=max_iter_mult, sparsity_coeff=sparsity_coeff)
+                         max_iter_mult=max_iter_mult, fast_mult_rules=True, sparsity_coeff=sparsity_coeff)
 
     error = np.linalg.norm(m0 -  w4_ism @ h4_updated_sparse.T) / np.linalg.norm(m0)
     # print('error ism before straightening: ',round(error, 2))
@@ -204,11 +202,11 @@ def ism(m0, n_embedding, n_themes, n_scores, n_items, norm_m0 = True, max_iter=2
         if iter_integrate == 0:               
             h4_updated, h4_updated_sparse, hhii_updated, w4_ism, h4_ism, q4_ism, tensor_score = \
                 integrate_scores(m0_nan_0, m0_weight, h4_updated_sparse, w4_ism, np.identity(n_themes), q4_ism, n_scores, n_items, n_themes, update_h4_ism=update_h4_ism,
-                                 max_iter_mult=max_iter_mult, sparsity_coeff=sparsity_coeff)      
+                                 max_iter_mult=max_iter_mult, fast_mult_rules=True, sparsity_coeff=sparsity_coeff)      
         else:
             h4_updated, h4_updated_sparse, hhii_updated, w4_ism, h4_ism, q4_ism, tensor_score = \
                 integrate_scores(m0_nan_0, m0_weight, h4_updated_sparse, w4_ism, h4_ism, q4_ism, n_scores, n_items, n_themes, update_h4_ism=update_h4_ism,
-                                 max_iter_mult=max_iter_mult, sparsity_coeff=sparsity_coeff)    
+                                 max_iter_mult=max_iter_mult, fast_mult_rules=True, sparsity_coeff=sparsity_coeff)    
 
         if (hhii_updated == hhii_updated_0).all():
             flag+=1
@@ -343,7 +341,7 @@ def ism_expand(m0, h4_sparse, h4_ism, q4_ism, n_themes, n_scores, n_items, max_i
             tensor_score[missing_rows, i1:i2] *= np.where(q4_ism[i_score, :] > 0, 1, 0)
 
     # Apply NTF with prescribed number of themes and update themes
-    my_ntfmodel = NTF(n_components=n_themes, leverage=leverage, init_type=2, max_iter=max_iter, tol=tol, verbose=verbose, random_state=random_state)
+    my_ntfmodel = NTF(n_components=n_themes, leverage=None, init_type=2, max_iter=max_iter, tol=tol, verbose=verbose, random_state=random_state)
     estimator_ = my_ntfmodel.fit_transform(tensor_score, h=h4_ism, q=q4_ism, update_h=False, update_q=True, n_blocks=n_scores)
     w4_ism = estimator_.w
 

diff --git a/examples/abis_biomed.ipynb b/examples/abis_biomed.ipynb
diff --git a/examples/simulation_biomed.ipynb b/examples/simulation_biomed.ipynb
@@ -0,0 +1,87 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Relative error:  0.08\n"
+     ]
+    }
+   ],
+   "source": [
+    "import adilsm.adilsm as ilsm\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "\n",
+    "max_noise_level = 0.01\n",
+    "# Generate a random non-negative matrix with 100 rows and 10 columns\n",
+    "A = np.random.rand(100, 10)\n",
+    "# Swap the columns of the A and add some noise to generate B\n",
+    "B = np.random.permutation(A.T).T + np.random.uniform(low=0, high=max_noise_level, size=A.shape)\n",
+    "# Add noise to A\n",
+    "A += np.random.uniform(low=0, high=max_noise_level, size=A.shape)\n",
+    "\n",
+    "#  ISM is expected to recognize that A and B convey the same information up to some noise, albeit with the columns of B swapped around.\n",
+    "\n",
+    "m0 = np.hstack((A, B))\n",
+    "\n",
+    "n_items = [A.shape[1], B.shape[1]]\n",
+    "n_scores = len(n_items)\n",
+    "n_embedding, n_themes = [10,10]\n",
+    "\n",
+    "h4_updated, h4_updated_sparse, w4_ism, h4_ism, q4_ism, tensor_score, m0_norm = ilsm.ism(m0, n_embedding, n_themes, n_scores, n_items, norm_m0=False, update_h4_ism=True,\n",
+    "                                                                        max_iter_mult=200,  sparsity_coeff=.8)\n",
+    "error = np.linalg.norm(m0 -  w4_ism @ h4_updated_sparse.T) / np.linalg.norm(m0)\n",
+    "print('Relative error: ',round(error, 2))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.0"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/uci_digits_biomed.ipynb b/examples/uci_digits_biomed.ipynb
diff --git a/setup.py b/setup.py
@@ -7,7 +7,7 @@
 
 setup(
     name="adilsm",
-    version='0.0.7',
+    version='0.0.8',
     author="Mazars",
     author_email="[email protected]",
     description=f"{description}",