Skip to content

Commit

Permalink
Merge pull request #34 from artefactory/mixMNL
Browse files Browse the repository at this point in the history
ADD:
    - BaseLatentClassModel
    - Specific LatentClass model for SimpleMNL & ConditionalMNL
    - Example Notebooks
    - Three dataset: heating, electricity and train
  • Loading branch information
VincentAuriau authored Mar 5, 2024
2 parents 6828b81 + 5cf51db commit f079757
Show file tree
Hide file tree
Showing 21 changed files with 2,517 additions and 215 deletions.
11 changes: 9 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ If you are new to choice modelling, you can check this [resource](https://www.pu
- Ready-To-Use datasets:
- [SwissMetro](./choice_learn/datasets/data/swissmetro.csv.gz) from Bierlaire et al. (2001) [[2]](#citation)
- [ModeCanada](./choice_learn/datasets/data/ModeCanada.csv.gz) from Koppelman et al. (1993) [[3]](#citation)
- The Train dataset from Ben Akiva et al. (1993) [5](#citation)
- The Heating & Electricity datasets from Kenneth Train described [here](https://rdrr.io/cran/mlogit/man/Electricity.html) and [here](https://rdrr.io/cran/mlogit/man/Heating.html)
- The TaFeng dataset from [Kaggle](https://www.kaggle.com/datasets/chiranjivdas09/ta-feng-grocery-dataset)
-

### Models
- Ready-to-use models:
Expand Down Expand Up @@ -91,10 +95,12 @@ Choice-Learn requires the following:
- Python (>=3.8)
- NumPy (>=1.24)
- pandas (>=1.5)

For modelling you need:
- TensorFlow (>=2.13)
Finally, an optional requirement used for report and LBFG-s use is:
- tensorflow_probability (>=0.20.1)

Finally, an optional requirement used for report and LBFG-S optimization is:
- TensorFlow Probability (>=0.20.1)

## Usage
```python
Expand Down Expand Up @@ -151,6 +157,7 @@ A detailed documentation of this project is available [here](https://artefactory
[2][The Acceptance of Model Innovation: The Case of Swissmetro](https://www.researchgate.net/publication/37456549_The_acceptance_of_modal_innovation_The_case_of_Swissmetro), Bierlaire, M.; Axhausen, K., W.; Abay, G. (2001)\
[3][Applications and Interpretation of Nested Logit Models of Intercity Mode Choice](https://trid.trb.org/view/385097), Forinash, C., V.; Koppelman, F., S. (1993)\
[4][The Demand for Local Telephone Service: A Fully Discrete Model of Residential Calling Patterns and Service Choices](https://www.jstor.org/stable/2555538), Train K., E.; McFadden, D., L.; Moshe, B. (1987)\
[5] [Estimation of Travel Choice Models with Randomly Distributed Values of Time](https://ideas.repec.org/p/fth/lavaen/9303.html), Ben-Akiva M; Bolduc D; Bradley M(1993)

### Code and Repositories
- [1][RUMnet](https://github.com/antoinedesir/rumnet)
Expand Down
145 changes: 112 additions & 33 deletions choice_learn/data/choice_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,7 +614,7 @@ def __len__(self):
"""
return len(self.choices)

def get_num_items(self):
def get_n_items(self):
"""Method to access the total number of different items.
Returns:
Expand All @@ -624,7 +624,7 @@ def get_num_items(self):
"""
return self.base_num_items

def get_num_choices(self):
def get_n_choices(self):
"""Method to access the total number of different choices.
Redundant with __len__ method.
Expand Down Expand Up @@ -689,7 +689,7 @@ def _contexts_items_features_df_to_np(
sess_df.columns = sess_df.loc[items_id_column]
if features is not None:
contexts_items_features.append(sess_df[items_index].loc[features].T.values)
contexts_items_availabilities.append(np.ones(len(items_index)))
contexts_items_availabilities.append(np.ones(len(items_index)).astype("float32"))
else:
sess_feats = []
sess_av = []
Expand Down Expand Up @@ -806,9 +806,15 @@ def from_single_wide_df(
else:
contexts_items_availabilities = None

choices = df[choices_column]
choices = df[choices_column].to_numpy()
print("choice", choices)
if choice_mode == "items_id":
if items_id is None:
raise ValueError("items_id must be given to use choice_mode 'items_id'")
items_id = np.array(items_id)
choices = np.squeeze([np.where(items_id == c)[0] for c in choices])
if choices.shape[0] == 0:
raise ValueError("No choice found in the items_id list")

return ChoiceDataset(
fixed_items_features=fixed_items_features,
Expand Down Expand Up @@ -940,7 +946,7 @@ def summary(self):
print("%=====================================================================%")
print("%%% Summary of the dataset:")
print("%=====================================================================%")
print("Number of items:", self.get_num_items())
print("Number of items:", self.get_n_items())
print(
"Number of choices:",
len(self),
Expand Down Expand Up @@ -1038,7 +1044,9 @@ def get_choices_batch(self, choices_indexes, features=None):
)

if self.contexts_items_availabilities is None:
contexts_items_availabilities = np.ones((len(choices_indexes), self.base_num_items))
contexts_items_availabilities = np.ones(
(len(choices_indexes), self.base_num_items)
).astype("float32")
else:
contexts_items_availabilities = self.contexts_items_availabilities[choices_indexes]
# .astype(self._return_types[3])
Expand Down Expand Up @@ -1179,7 +1187,7 @@ def get_choices_batch(self, choices_indexes, features=None):
)

if self.contexts_items_availabilities is None:
contexts_items_availabilities = np.ones((self.base_num_items))
contexts_items_availabilities = np.ones((self.base_num_items)).astype("float32")
else:
contexts_items_availabilities = self.contexts_items_availabilities[choices_indexes]

Expand Down Expand Up @@ -1299,41 +1307,67 @@ def __getitem__(self, choices_indexes):
elif isinstance(choices_indexes, slice):
return self.__getitem__(list(range(*choices_indexes.indices(len(self.choices)))))

if self.fixed_items_features[0] is None:
fixed_items_features = None
else:
try:
if self.fixed_items_features[0] is None:
fixed_items_features = None
else:
fixed_items_features = self.fixed_items_features
except TypeError:
fixed_items_features = self.fixed_items_features
if self.contexts_features[0] is None:

try:
if self.contexts_features[0] is None:
contexts_features = None
else:
contexts_features = tuple(
self.contexts_features[i][choices_indexes]
for i in range(len(self.contexts_features))
)
except TypeError:
contexts_features = None
else:
contexts_features = tuple(
self.contexts_features[i][choices_indexes]
for i in range(len(self.contexts_features))
)
if self.contexts_items_features[0] is None:

try:
if self.contexts_items_features[0] is None:
contexts_items_features = None
else:
contexts_items_features = tuple(
self.contexts_items_features[i][choices_indexes]
for i in range(len(self.contexts_items_features))
)
except TypeError:
contexts_items_features = None
else:
contexts_items_features = tuple(
self.contexts_items_features[i][choices_indexes]
for i in range(len(self.contexts_items_features))
)
if self.fixed_items_features_names[0] is None:

try:
if self.fixed_items_features_names[0] is None:
fixed_items_features_names = None
else:
fixed_items_features_names = self.fixed_items_features_names
except TypeError:
fixed_items_features_names = None
else:
fixed_items_features_names = self.fixed_items_features_names
if self.contexts_features_names[0] is None:
try:
if self.contexts_features_names[0] is None:
contexts_features_names = None
else:
contexts_features_names = self.contexts_features_names
except TypeError:
contexts_features_names = None
else:
contexts_features_names = self.contexts_features_names
if self.contexts_items_features_names[0] is None:
try:
if self.contexts_items_features_names[0] is None:
contexts_items_features_names = None
else:
contexts_items_features_names = self.contexts_items_features_names
except TypeError:
contexts_items_features_names = None
else:
contexts_items_features_names = self.contexts_items_features_names

try:
contexts_items_availabilities = self.contexts_items_availabilities[choices_indexes]
except TypeError:
contexts_items_availabilities = None
return ChoiceDataset(
fixed_items_features=fixed_items_features,
contexts_features=contexts_features,
contexts_items_features=contexts_items_features,
contexts_items_availabilities=self.contexts_items_availabilities[choices_indexes],
contexts_items_availabilities=contexts_items_availabilities,
choices=[self.choices[i] for i in choices_indexes],
fixed_items_features_names=fixed_items_features_names,
contexts_features_names=contexts_features_names,
Expand Down Expand Up @@ -1391,8 +1425,53 @@ def filter(self, bool_list):
Parameters
----------
bool_list : list of boolean
list of booleans of length self.get_num_sessions() to filter sessions.
list of booleans of length self.get_n_contexts() to filter contexts.
True to keep, False to discard.
"""
indexes = [i for i, keep in enumerate(bool_list) if keep]
return self[indexes]

def get_n_fixed_items_features(self):
"""Method to access the number of fixed items features.
Returns:
-------
int
number of fixed items features
"""
if self.fixed_items_features is not None:
n_features = 0
for fixed_features in self.fixed_items_features:
n_features += fixed_features.shape[1]
return n_features
return 0

def get_n_contexts_features(self):
"""Method to access the number of contexts features.
Returns:
-------
int
number of fixed items features
"""
if self.contexts_features is not None:
n_features = 0
for context_features in self.contexts_features:
n_features += context_features.shape[1]
return n_features
return 0

def get_n_contexts_items_features(self):
"""Method to access the number of context items features.
Returns:
-------
int
number of fixed items features
"""
if self.contexts_items_features is not None:
n_features = 0
for contexts_items_features in self.contexts_items_features:
n_features += contexts_items_features.shape[2]
return n_features
return 0
6 changes: 4 additions & 2 deletions choice_learn/data/indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ def __getitem__(self, choices_indexes):
if self.choice_dataset.contexts_items_availabilities is None:
contexts_items_availabilities = np.ones(
(len(choices_indexes), self.choice_dataset.base_num_items)
)
).astype("float32")
else:
if hasattr(self.choice_dataset.contexts_items_availabilities, "batch"):
contexts_items_availabilities = (
Expand Down Expand Up @@ -440,7 +440,9 @@ def __getitem__(self, choices_indexes):
choice = self.choice_dataset.choices[choices_indexes]

if self.choice_dataset.contexts_items_availabilities is None:
contexts_items_availabilities = np.ones((self.choice_dataset.base_num_items))
contexts_items_availabilities = np.ones(
(self.choice_dataset.base_num_items)
).astype("float32")
else:
contexts_items_availabilities = self.choice_dataset.contexts_items_availabilities[
choices_indexes
Expand Down
7 changes: 2 additions & 5 deletions choice_learn/datasets/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
"""Init file for datasets module."""

from .base import load_modecanada, load_swissmetro
from .base import load_electricity, load_heating, load_modecanada, load_swissmetro

__all__ = [
"load_modecanada",
"load_swissmetro",
]
__all__ = ["load_modecanada", "load_swissmetro", "load_electricity", "load_heating"]
Loading

0 comments on commit f079757

Please sign in to comment.