ADD: NumPy & TensorFlow citations

artefactory · Mar 20, 2024 · cb13c49 · cb13c49
1 parent 40a5d65
commit cb13c49
Show file tree

Hide file tree

Showing 3 changed files with 119 additions and 86 deletions.
diff --git a/docs/paper/paper.bib b/docs/paper/paper.bib
@@ -30,3 +30,76 @@ @misc{Du:2023
       archivePrefix={arXiv},
       primaryClass={cs.LG}
 }
+
+@misc{Aouad:2023,
+      title={Representing Random Utility Choice Models with Neural Networks},
+      author={Ali Aouad and Antoine Désir},
+      year={2023},
+      eprint={2207.12877},
+      archivePrefix={arXiv},
+      primaryClass={cs.LG}
+}
+
+@misc{Han:2022,
+      title={A Neural-embedded Choice Model: TasteNet-MNL Modeling Taste Heterogeneity with Flexibility and Interpretability},
+      author={Yafei Han and Francisco Camara Pereira and Moshe Ben-Akiva and Christopher Zegras},
+      year={2022},
+      eprint={2002.00922},
+      archivePrefix={arXiv},
+      primaryClass={econ.EM}
+}
+
+@misc{Salvadé:2024,
+      title={RUMBoost: Gradient Boosted Random Utility Models},
+      author={Nicolas Salvadé and Tim Hillel},
+      year={2024},
+      eprint={2401.11954},
+      archivePrefix={arXiv},
+      primaryClass={cs.LG}
+}
+
+@article{Train:1987,
+ ISSN = {07416261},
+ URL = {http://www.jstor.org/stable/2555538},
+ abstract = {We present an empirical model of households' choices among local telephone service options (for example, between flat-rate and measured service) and the interrelation of these choices with the number and average duration of local calls households make at each time of day to each geographical zone. Using a nested logit model with estimation performed on a randomly selected subset of the households' calling patterns, we calculate elasticities of demand for each local service option, number of calls, average duration, and revenues with respect to the fixed monthly charges and the usage charges for calling under each option. We find moderate price elasticities of number of calls with respect to usage charges for households subscribing to measured service. Nevertheless, raising usage charges has a negligible effect on revenues, since a sufficient number of households either originally subscribe to flat-rate service or convert to flat-rate service in response to higher usage charges. We find a high elasticity of demand for each service option with respect to its fixed monthly fee. This indicates high substitutability among service options. The shift among service options induces new calling patterns, which we find to be a small but not negligible indirect effect.},
+ author = {Kenneth E. Train and Daniel L. McFadden and Moshe Ben-Akiva},
+ journal = {The RAND Journal of Economics},
+ number = {1},
+ pages = {109--123},
+ publisher = {[RAND Corporation, Wiley]},
+ title = {The Demand for Local Telephone Service: A Fully Discrete Model of Residential Calling Patterns and Service Choices},
+ urldate = {2024-03-19},
+ volume = {18},
+ year = {1987}
+}
+
+@Article{Harris:2020,
+ title         = {Array programming with {NumPy}},
+ author        = {Charles R. Harris and K. Jarrod Millman and St{\'{e}}fan J.
+                 van der Walt and Ralf Gommers and Pauli Virtanen and David
+                 Cournapeau and Eric Wieser and Julian Taylor and Sebastian
+                 Berg and Nathaniel J. Smith and Robert Kern and Matti Picus
+                 and Stephan Hoyer and Marten H. van Kerkwijk and Matthew
+                 Brett and Allan Haldane and Jaime Fern{\'{a}}ndez del
+                 R{\'{i}}o and Mark Wiebe and Pearu Peterson and Pierre
+                 G{\'{e}}rard-Marchant and Kevin Sheppard and Tyler Reddy and
+                 Warren Weckesser and Hameer Abbasi and Christoph Gohlke and
+                 Travis E. Oliphant},
+ year          = {2020},
+ journal       = {Nature},
+ volume        = {585},
+ number        = {7825},
+ pages         = {357--362},
+ doi           = {10.1038/s41586-020-2649-2},
+ publisher     = {Springer Science and Business Media {LLC}},
+ url           = {https://doi.org/10.1038/s41586-020-2649-2}
+}
+
+@software{Abadi:2015,
+author = {Abadi, Martín and Agarwal, Ashish and Barham, Paul and Brevdo, Eugene and Chen, Zhifeng and Citro, Craig and Corrado, Greg S. and Davis, Andy and Dean, Jeffrey and Devin, Matthieu and Ghemawat, Sanjay and Goodfellow, Ian and Harp, Andrew and Irving, Geoffrey and Isard, Michael and Jozefowicz, Rafal and Jia, Yangqing and Kaiser, Lukasz and Kudlur, Manjunath and Levenberg, Josh and Mané, Dan and Schuster, Mike and Monga, Rajat and Moore, Sherry and Murray, Derek and Olah, Chris and Shlens, Jonathon and Steiner, Benoit and Sutskever, Ilya and Talwar, Kunal and Tucker, Paul and Vanhoucke, Vincent and Vasudevan, Vijay and Viégas, Fernanda and Vinyals, Oriol and Warden, Pete and Wattenberg, Martin and Wicke, Martin and Yu, Yuan and Zheng, Xiaoqiang},
+doi = {10.5281/zenodo.4724125},
+license = {Apache-2.0},
+month = nov,
+title = {{TensorFlow, Large-scale machine learning on heterogeneous systems}},
+year = {2015}
+}
diff --git a/docs/paper/paper.md b/docs/paper/paper.md
@@ -1,20 +1,21 @@
 ---
 title: 'Choice-Learn: A Python package for generic choice modelling with large datasets.'
 # Idea to introduce: ML&Classical, toolbox
+# A Python Toolbox for generic and custom choice modelling ?
 tags:
   - Python
   - choice
   - decision
 authors:
   - name: Vincent Auriau
+    corresponding: true # (This is how to denote the corresponding author)
     orcid: 0000-0000-0000-0000
     equal-contrib: true
     affiliation: "1, 2" # (Multiple affiliations must be quoted)
   - name: Author Without ORCID
     equal-contrib: true # (This is how you can denote equal contributions between multiple authors)
     affiliation: 2
   - name: Author with no affiliation
-    corresponding: true # (This is how to denote the corresponding author)
     affiliation: 3
   - given-names: Ludwig
     dropping-particle: van
@@ -40,22 +41,61 @@ aas-journal: Astrophysical Journal <- The name of the AAS journal.
 
 Discrete choice models aim at explaining or predicting a choice from a set of alternatives. Well known use-cases include analyzing people choice of mean of transport or products purchases in stores. One key attribute of choice models is their ability to handle sets of variable sizes, with some alternatives being possibly unavailable. Choice models can be used to estimate interpretable values such as a consumer's price elasticity. Once estimated, they can also be used in second processing step such as assortment optimization or pricing. Recent outbreaks in the Machine-Learning community called for the use of more complex models and larger datasets in the estimation of choice models.
 
-`Choice-Learn` aims at providing useful tools for academic researchs as well as practioners.(# Add information here)  In particular, the package focuses on three main points to extend choice models to more recent use-cases:
-- Providing "classical" and "ML-based" literature choice models within the same codebase
+`Choice-Learn` aims at providing useful tools for academic researchs as well as practioners.(# Add information here)  In particular, the package focuses on three main points to extend choice modelling tools:
+- Providing "classical" and "MachineLearning-based" literature choice models within the same codebase
 - Possibility to work with very large datasets with RAM optimization and batching processes
 - Creating an easy-to-use interface to build custom choice models
 
 # Statement of need
 
-With the fast-paced improvement of companies data architectures, larger reliable datasets emerge. Choice modelling is a natural tool for a retailer to understand its customer base and to improve or optimize its commercial offer. The large datasets now available open the door for the use of more complex machine learning models that can otherwise be difficult to estimate with little data. While several Python packages have been made available to estimate choice models [@Bierlaire:2023; @Brathwaite:2018] they are usually not built to work with large-scale datasets. The codebase is also usually not built to allow for users models customization [@Du:2023] making necessary the use of different packages that can make comparisons difficult.
+### Small introduction on choice modelling
+
+With the fast-paced improvement of companies data architectures, larger reliable datasets emerge. Choice modelling is a natural tool for a retailer to understand its customer base and to improve or optimize its commercial offer. The large datasets now available open the door for the use of more complex machine learning models that can otherwise be difficult to estimate with little data. While several efficient Python packages have been made available to estimate choice models [@Bierlaire:2023; @Brathwaite:2018] they are usually not built to work with large-scale datasets.
+
+With these large datasets comes the possibility to use more complex models. Recent publications outlines this possibility with neural networks approaches [@Han:2022, @Aouad:2023] or tree-based boosting models [@Salvadé:2024]. The existing libraries [@Bierlaire:2023; @Brathwaite:2018, @Du:2023] are usually not built to allow for models customization making necessary the use of different packages that can make comparisons difficult.
+
+Choice-Learn is organized around three pillars (?) that can be used independantly or altogether.
+- Dataset handling: a NumPy [@Harris:2020] based method to create batches of data is proposed. By limiting data replication, it optimizes the memory usage.
+- Choice models: Choice-Learn proposes ready-to-use models with a Python interface such as the Conditional-MNL[@Train:1987] or RUMnet[@Aouad:2023]. Based on Tensorflow[@Abadi:2015], the implementation ensures efficient learning with the different available optimization algorithm, and offers GPU compatibility. Choice-Learn also aims at helping for building new and custom choice models with a common inheritance scheme that minimizes the user's work. Compared to usual implementations non linear formulations of utility are possible.
+- Tools: Choice Models can be used for usecases such as assortment optimization. The models signature let easily use the model's output. Implementations are also proposed.
 
-> Reprendre les 3 piliers + (TensorFlow, prod ?) + ()
 
 # Batching and RAM usage
 
+Choice models estimate a utility function from which a probability to choose each alternative is derived.
 
 # Choice model customization
 
-# Acknowledgements`
+Inheriting the ChoiceModel class lets the user define its own choice model. One only needs to precise how to compute the utility of a batch of data using TensorFlow operations. Here is an example.
+### Check example > What would be a great example ?
+
+```python
+from tensorflow.keras.layers import Dense
+from choice_learn.models import ChoiceModel
+
+class ExampleCustomizedModel(ChoiceModel):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        # First non-linear layer
+        self.dense_1 = Dense(units=10, activation="elu")
+        # Second linear layer
+        self.dense_2 = Dense(units=1, activation="linear")
+        # We do not forget to specify self.weights with all coefficients that need to be estimated.
+        # Easy with TensorFlow.Layer
+        self.weights = self.dense_1.trainable_variables + self.dense_2.trainable_variables
+
+    def compute_batch_utility(self,
+                        fixed_items_features,
+                        contexts_features,
+                        contexts_items_features,
+                        contexts_items_availabilities,
+                        choices):
+        # We apply the neural network to all sessions_items_features for all the items
+        # We then concatenate the utilities of each item of shape (n_sessions, 1) into a single one of shape (n_sessions, n_items)
+        u = tf.concat([self.dense_2(self.dense_1(contexts_items_features[0][:, i])) for i in range(contexts_items_features[0].shape[1])], axis=1)
+        return u
+```
+
+# Acknowledgements
 
 # References
diff --git a/notebooks/rumnet_example.ipynb b/notebooks/rumnet_example.ipynb
@@ -49,100 +49,20 @@
     "First, we download the SwissMetro dataset:"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df = load_swissmetro(as_frame=True)\n",
-    "df.head()"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "We follow the same data preparation as in the original paper in order to get the exact same results.\n"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "df = df.loc[df.CHOICE!=0]\n",
-    "choices = df.CHOICE.to_numpy() - 1\n",
-    "contexts_items_availabilities = df[[\"TRAIN_AV\", \"SM_AV\", \"CAR_AV\"]].to_numpy()\n",
-    "contexts_items_features = np.stack([df[[\"TRAIN_TT\", \"TRAIN_CO\", \"TRAIN_HE\"]].to_numpy(),\n",
-    "                                    df[[\"SM_TT\", \"SM_CO\", \"SM_HE\"]].to_numpy(),\n",
-    "                                    df[[\"CAR_TT\", \"CAR_CO\", \"CAR_HE\"]].to_numpy()], axis=1)\n",
-    "# contexts_features = df[[\"GROUP\", \"PURPOSE\", \"FIRST\", \"TICKET\", \"WHO\", \"LUGGAGE\", \"AGE\", \"MALE\",\n",
-    "#                         \"INCOME\", \"GA\", \"ORIGIN\", \"DEST\"]].to_numpy()\n",
-    "fixed_items_features = np.eye(3)\n",
-    "\n",
-    "contexts_items_features[:, :, 0] = contexts_items_features[:, :, 0] / 1000\n",
-    "contexts_items_features[:, :, 1] = contexts_items_features[:, :, 1] / 5000\n",
-    "contexts_items_features[:, :, 2] = contexts_items_features[:, :, 2] / 100\n",
-    "\n",
-    "long_data = pd.get_dummies(df,\n",
-    "                           columns=[\"GROUP\", \"PURPOSE\", \"FIRST\", \"TICKET\", \"WHO\",\n",
-    "                                        \"LUGGAGE\", \"AGE\", \"MALE\",\n",
-    "                                        \"INCOME\", \"GA\", \"ORIGIN\", \"DEST\"],\n",
-    "                                        drop_first=False)\n",
-    "\n",
-    "# Transorming the category data into OneHot\n",
-    "contexts_features = []\n",
-    "for col in long_data.columns:\n",
-    "    if col.startswith(\"GROUP\"):\n",
-    "        contexts_features.append(col)\n",
-    "    if col.startswith(\"PURPOSE\"):\n",
-    "        contexts_features.append(col)\n",
-    "    if col.startswith(\"FIRST\"):\n",
-    "        contexts_features.append(col)\n",
-    "    if col.startswith(\"TICKET\"):\n",
-    "        contexts_features.append(col)\n",
-    "    if col.startswith(\"WHO\"):\n",
-    "        contexts_features.append(col)\n",
-    "    if col.startswith(\"LUGGAGE\"):\n",
-    "        contexts_features.append(col)\n",
-    "    if col.startswith(\"AGE\"):\n",
-    "        contexts_features.append(col)\n",
-    "    if col.startswith(\"MALE\"):\n",
-    "        contexts_features.append(col)\n",
-    "    if col.startswith(\"INCOME\"):\n",
-    "        contexts_features.append(col)\n",
-    "    if col.startswith(\"GA\"):\n",
-    "        contexts_features.append(col)\n",
-    "    if col.startswith(\"ORIGIN\"):\n",
-    "        contexts_features.append(col)\n",
-    "    if col.startswith(\"DEST\"):\n",
-    "        contexts_features.append(col)\n",
-    "\n",
-    "contexts_features = long_data[contexts_features].to_numpy()"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "Now, we can create our ChoiceDataset from the dataframe."
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dataset = ChoiceDataset(fixed_items_features=(fixed_items_features.astype(\"float32\"), ),\n",
-    "                        contexts_features=(contexts_features.astype(\"float32\"), ),\n",
-    "                        contexts_items_features=(contexts_items_features.astype(\"float32\"), ),\n",
-    "                        contexts_items_availabilities=contexts_items_availabilities,\n",
-    "                        choices=choices)"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,