Skip to content

Commit

Permalink
ENH: support elimination of groups of products for second choice data
Browse files Browse the repository at this point in the history
  • Loading branch information
jeffgortmaker committed Feb 25, 2023
1 parent bee963a commit 15010d1
Show file tree
Hide file tree
Showing 11 changed files with 255 additions and 94 deletions.
4 changes: 3 additions & 1 deletion docs/background.rst
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,9 @@ Micro moment parts based on second choice are averages over values :math:`v_{pij

Its simulated analogue is

.. math:: v_p = \frac{\sum_{t \in T} \sum_{i \in I_t} \sum_{j, k \in J_t \cup \{0\}} w_{it} s_{ijt} s_{ik(-j)t} w_{d_pijkt} v_{pijkt}}{\sum_{t \in T} \sum_{i \in I_t} \sum_{j, k \in J_t \cup \{0\}} w_{it} s_{ijt} s_{ik(-j)t} w_{d_pijkt}}.
.. math:: v_p = \frac{\sum_{t \in T} \sum_{i \in I_t} \sum_{j, k \in J_t \cup \{0\}} w_{it} s_{ijt} s_{ik(-j)t} w_{d_pijkt} v_{pijkt}}{\sum_{t \in T} \sum_{i \in I_t} \sum_{j, k \in J_t \cup \{0\}} w_{it} s_{ijt} s_{ik(-j)t} w_{d_pijkt}},

in which :math:`s_{ik(-j)t}` is the probability of choosing :math:`k` when :math:`j` is removed from the choice set. One can also define micro moment parts based on second choices where a group of products :math:`h(j)` containing the first choice :math:`j` is removed from the choice set. In this case, the above second choice probabilities become :math:`s_{ik(-h(j))t}`.

Covariances are defined analogously.

Expand Down
10 changes: 10 additions & 0 deletions pyblp/economies/economy.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,16 @@ def _validate_name(self, name: Optional[str], none_valid: bool = True) -> None:
if name not in names:
raise NameError(f"'{name}' is not None or one of the underlying variables, {list(sorted(names))}.")

def _validate_product_ids_index(self, product_ids_index: int) -> None:
"""Validate that a product IDs index is valid."""
if not isinstance(product_ids_index, int) or product_ids_index < 0:
raise ValueError("The product IDs index must be a non-negative int.")
if self.products.product_ids.size == 0:
raise ValueError("Since the product IDs index is not None, product_data must have product_ids.")
max_index = self.products.product_ids.shape[1] - 1
if not 0 <= product_ids_index <= max_index:
raise ValueError(f"The product IDs index should be at most {max_index}.")

def _coerce_optional_firm_ids(self, firm_ids: Optional[Any], market_ids: Optional[Array] = None) -> Array:
"""Coerce optional array-like firm IDs into a column vector and validate it. By default, assume that firm IDs
are for all markets.
Expand Down
3 changes: 2 additions & 1 deletion pyblp/economies/problem.py
Original file line number Diff line number Diff line change
Expand Up @@ -1279,7 +1279,8 @@ class Problem(ProblemEconomy):
It may be convenient to define IDs for different products:
- **product_ids** (`object, optional`) - IDs that identify products within markets.
- **product_ids** (`object, optional`) - IDs that identify products within markets. There can be multiple
columns.
Finally, clustering groups can be specified to account for within-group correlation while updating the weighting
matrix and estimating standard errors:
Expand Down
3 changes: 2 additions & 1 deletion pyblp/economies/simulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@ class Simulation(Economy):
It may be convenient to define IDs for different products:
- **product_ids** (`object, optional`) - IDs that identify products within markets.
- **product_ids** (`object, optional`) - IDs that identify products within markets. There can be multiple
columns.
To simulate a nested logit or random coefficients nested logit (RCNL) model, nesting groups must be specified:
Expand Down
6 changes: 3 additions & 3 deletions pyblp/markets/economy_results_market.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,8 +353,8 @@ def safely_compute_profits(

@NumericalErrorHandler(exceptions.PostEstimationNumericalError)
def safely_compute_consumer_surplus(
self, keep_all: bool, eliminate_product_ids: Optional[Any], prices: Optional[Array]) -> (
Tuple[Array, List[Error]]):
self, keep_all: bool, eliminate_product_ids: Optional[Any], product_ids_index: int,
prices: Optional[Array]) -> Tuple[Array, List[Error]]:
"""Estimate population-normalized consumer surplus or keep all individual-level surpluses. By default, use
unchanged prices, handling any numerical errors.
"""
Expand All @@ -379,7 +379,7 @@ def safely_compute_consumer_surplus(

# eliminate any products from the choice set
if eliminate_product_ids is not None:
for j, product_id in enumerate(self.products.product_ids):
for j, product_id in enumerate(self.products.product_ids[:, product_ids_index]):
if product_id in eliminate_product_ids:
exp_utilities[j] = 0

Expand Down
185 changes: 125 additions & 60 deletions pyblp/markets/market.py

Large diffs are not rendered by default.

20 changes: 19 additions & 1 deletion pyblp/micro.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,12 @@ class MicroDataset(StringRepresentation):
issue, consider setting ``pyblp.options.micro_computation_chunks`` to a value higher than its default of
``1``, such as the highest :math:`J_t`. This will cut down on memory usage without much affecting speed.
eliminated_product_ids_index : `int, optional`
This option determines whether the dataset's second choices are after only the first choice product :math:`j` is
eliminated from the choice set, in which case this should be ``None``, the default, or if a group of products
including the first choice product is eliminated, in which case this should be a number between ``0`` and the
number of columns in the ``product_ids`` field of ``product_data`` minus one, inclusive. The column of
``product_ids`` determines the groups.
market_ids : `array-like, optional`
Distinct market IDs with nonzero survey weights :math:`w_{dijt}`. For other markets, :math:`w_{dijt} = 0`, and
``compute_weights`` will not be called.
Expand All @@ -94,9 +100,11 @@ class MicroDataset(StringRepresentation):
observations: int
compute_weights: functools.partial
market_ids: Optional[Set]
eliminated_product_ids_index: Optional[int]

def __init__(
self, name: str, observations: int, compute_weights: Callable,
eliminated_product_ids_index: Optional[int] = None,
market_ids: Optional[Union[Sequence, Array]] = None) -> None:
"""Validate information to the greatest extent possible without an economy or calling the function."""
if not isinstance(name, str):
Expand All @@ -110,6 +118,12 @@ def __init__(
self.observations = observations
self.compute_weights = functools.partial(compute_weights)

# validate the product IDs index
self.eliminated_product_ids_index = eliminated_product_ids_index
if eliminated_product_ids_index is not None:
if not isinstance(eliminated_product_ids_index, int) or eliminated_product_ids_index < 0:
raise ValueError("eliminated_product_ids_index must be None or a non-negative int.")

# validate market IDs, checking for duplicates
if market_ids is None:
self.market_ids = None
Expand All @@ -128,7 +142,11 @@ def __str__(self) -> str:
return f"{self.name}: {self.observations} Observations in {self._format_markets(text=True)}"

def _validate(self, economy: 'Economy') -> None:
"""Check that all market IDs associated with this dataset are in the economy."""
"""Check that all market IDs associated with this dataset are in the economy and that any eliminated product
IDs index is valid.
"""
if self.eliminated_product_ids_index is not None:
economy._validate_product_ids_index(self.eliminated_product_ids_index)
if self.market_ids is not None:
extra_ids = self.market_ids - set(economy.unique_market_ids)
if extra_ids:
Expand Down
2 changes: 0 additions & 2 deletions pyblp/primitives.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,6 @@ def __new__(
raise ValueError("The firm_ids field of product_data must be one-dimensional.")
if nesting_ids is not None and nesting_ids.shape[1] > 1:
raise ValueError("The nesting_ids field of product_data must be one-dimensional.")
if product_ids is not None and product_ids.shape[1] > 1:
raise ValueError("The product_ids field of product_data must be one-dimensional.")
if clustering_ids is not None:
if clustering_ids.shape[1] > 1:
raise ValueError("The clustering_ids field of product_data must be one-dimensional.")
Expand Down
10 changes: 8 additions & 2 deletions pyblp/results/economy_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -968,7 +968,7 @@ def compute_profit_hessians(

def compute_consumer_surpluses(
self, prices: Optional[Any] = None, keep_all: bool = False, eliminate_product_ids: Optional[Any] = None,
market_id: Optional[Any] = None) -> Array:
product_ids_index: int = 0, market_id: Optional[Any] = None) -> Array:
r"""Estimate population-normalized consumer surpluses, :math:`\text{CS}`.
Assuming away nonlinear income effects, the surplus in market :math:`t` is
Expand Down Expand Up @@ -1017,6 +1017,10 @@ def compute_consumer_surpluses(
IDs of the products to eliminate from the choice set. These IDs should show up in the ``product_ids`` field
of ``product_data`` in :class:`Problem`. Eliminating one or more products and comparing consumer surpluses
gives a measure of willingness to pay for these products.
product_ids_index : `int, optional`
Index between ``0`` and the number of columns in the ``product_ids`` field of ``product_data`` minus one,
inclusive, which determines which column of product IDs ``eliminate_product_ids`` refers to. By default, it
refers to the first column, which is index ``0``.
market_id : `object, optional`
ID of the market in which to compute consumer surplus. By default, consumer surpluses are computed in all
markets and stacked.
Expand All @@ -1036,11 +1040,13 @@ def compute_consumer_surpluses(
"""
output("Computing consumer surpluses with the equation that assumes away nonlinear income effects ...")
if eliminate_product_ids is not None:
self._economy._validate_product_ids_index(product_ids_index)
market_ids = self._select_market_ids(market_id)
prices = self._coerce_optional_prices(prices, market_ids)
return self._combine_arrays(
EconomyResultsMarket.safely_compute_consumer_surplus, market_ids,
fixed_args=[keep_all, eliminate_product_ids], market_args=[prices]
fixed_args=[keep_all, eliminate_product_ids, product_ids_index], market_args=[prices]
)


Expand Down
100 changes: 79 additions & 21 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ def large_blp_simulation() -> SimulationFixture:
product_data={
'market_ids': id_data.market_ids,
'firm_ids': id_data.firm_ids,
'product_ids': product_ids,
'product_ids': np.c_[product_ids, np.mod(product_ids, 2)],
'clustering_ids': np.random.RandomState(2).choice(range(30), id_data.size)
},
beta=[1, 1, 2, 3, 1],
Expand Down Expand Up @@ -339,12 +339,19 @@ def large_blp_simulation() -> SimulationFixture:
]
}

inside_diversion_micro_dataset = MicroDataset(
inside_diversion_micro_dataset0 = MicroDataset(
name="diversion from 1",
observations=simulation.N,
compute_weights=lambda _, p, a: np.tile(p.product_ids == 1, (a.size, 1, 1 + p.size)),
compute_weights=lambda _, p, a: np.tile(p.product_ids[:, [0]] == 1, (a.size, 1, 1 + p.size)),
market_ids=simulation.unique_market_ids[6:10],
)
inside_diversion_micro_dataset1 = MicroDataset(
name="diversion from 1, grouped",
observations=simulation.N,
compute_weights=lambda _, p, a: np.tile(p.product_ids[:, [0]] == 1, (a.size, 1, 1 + p.size)),
market_ids=simulation.unique_market_ids[6:10],
eliminated_product_ids_index=1,
)
outside_diversion_micro_dataset = MicroDataset(
name="diversion from outside",
observations=simulation.N,
Expand All @@ -362,7 +369,7 @@ def large_blp_simulation() -> SimulationFixture:
dataset=MicroDataset(
name="product 0",
observations=simulation.N,
compute_weights=lambda _, p, a: np.tile(p.product_ids.flat == 0, (a.size, 1)),
compute_weights=lambda _, p, a: np.tile(p.product_ids[:, 0] == 0, (a.size, 1)),
),
compute_values=lambda _, p, a: np.tile(a.demographics[:, [1]], (1, p.size)),
),
Expand All @@ -376,7 +383,7 @@ def large_blp_simulation() -> SimulationFixture:
name="product 0 and outside",
observations=simulation.N,
compute_weights=lambda _, p, a: np.c_[
np.ones((a.size, 1)), np.tile(p.product_ids.flat == 0, (a.size, 1))
np.ones((a.size, 1)), np.tile(p.product_ids[:, 0] == 0, (a.size, 1))
],
market_ids=simulation.unique_market_ids[1:4],
),
Expand All @@ -390,32 +397,54 @@ def large_blp_simulation() -> SimulationFixture:
value=0,
parts=MicroPart(
name="1 to 0 diversion ratio",
dataset=inside_diversion_micro_dataset,
dataset=inside_diversion_micro_dataset0,
compute_values=lambda _, p, a: np.concatenate(
[np.zeros((a.size, p.size, 1)), np.tile(p.product_ids.flat == 0, (a.size, p.size, 1))], axis=2
[np.zeros((a.size, p.size, 1)), np.tile(p.product_ids[:, 0] == 0, (a.size, p.size, 1))], axis=2
),
),
),
MicroMoment(
name="outside to 0 diversion ratio",
name="1 to outside diversion ratio",
value=0,
parts=MicroPart(
name="outside to 0 diversion ratio",
dataset=outside_diversion_micro_dataset,
compute_values=lambda _, p, a: np.tile(p.product_ids.flat == 0, (a.size, 1 + p.size, 1)),
name="1 to outside diversion ratio",
dataset=inside_diversion_micro_dataset0,
compute_values=lambda _, p, a: np.concatenate(
[np.ones((a.size, p.size, 1)), np.zeros((a.size, p.size, p.size))], axis=2
),
),
),
MicroMoment(
name="1 to outside diversion ratio",
name="1 to 0 diversion ratio, grouped",
value=0,
parts=MicroPart(
name="1 to outside diversion ratio",
dataset=inside_diversion_micro_dataset,
name="1 to 0 diversion ratio, grouped",
dataset=inside_diversion_micro_dataset1,
compute_values=lambda _, p, a: np.concatenate(
[np.zeros((a.size, p.size, 1)), np.tile(p.product_ids[:, 0] == 0, (a.size, p.size, 1))], axis=2
),
),
),
MicroMoment(
name="1 to outside diversion ratio, grouped",
value=0,
parts=MicroPart(
name="1 to outside diversion ratio, grouped",
dataset=inside_diversion_micro_dataset1,
compute_values=lambda _, p, a: np.concatenate(
[np.ones((a.size, p.size, 1)), np.zeros((a.size, p.size, p.size))], axis=2
),
),
),
MicroMoment(
name="outside to 0 diversion ratio",
value=0,
parts=MicroPart(
name="outside to 0 diversion ratio",
dataset=outside_diversion_micro_dataset,
compute_values=lambda _, p, a: np.tile(p.product_ids[:, 0] == 0, (a.size, 1 + p.size, 1)),
),
),
MicroMoment(
name="unconditional diversion interaction",
value=0,
Expand Down Expand Up @@ -503,7 +532,7 @@ def large_nested_blp_simulation() -> SimulationFixture:
product_data={
'market_ids': id_data.market_ids,
'firm_ids': id_data.firm_ids,
'product_ids': product_ids,
'product_ids': np.c_[product_ids, np.mod(product_ids, 2)],
'nesting_ids': state.choice(['f', 'g', 'h'], id_data.size),
'clustering_ids': state.choice(range(30), id_data.size),
},
Expand Down Expand Up @@ -546,11 +575,18 @@ def large_nested_blp_simulation() -> SimulationFixture:
dataset=inside_micro_dataset,
compute_values=lambda _, p, a: np.tile((a.agent_ids == 2) | (a.agent_ids == 3), (1, p.size)),
)
inside_diversion_micro_dataset = MicroDataset(
inside_diversion_micro_dataset0 = MicroDataset(
name="diversion from 1",
observations=simulation.N,
compute_weights=lambda _, p, a: np.tile(p.product_ids == 1, (a.size, 1, 1 + p.size)),
compute_weights=lambda _, p, a: np.tile(p.product_ids[:, [0]] == 1, (a.size, 1, 1 + p.size)),
market_ids=[simulation.unique_market_ids[2]],
)
inside_diversion_micro_dataset1 = MicroDataset(
name="diversion from 1, grouped",
observations=simulation.N,
compute_weights=lambda _, p, a: np.tile(p.product_ids[:, [0]] == 1, (a.size, 1, 1 + p.size)),
market_ids=[simulation.unique_market_ids[2]],
eliminated_product_ids_index=1,
)
outside_diversion_micro_dataset = MicroDataset(
name="diversion from outside",
Expand Down Expand Up @@ -616,9 +652,9 @@ def large_nested_blp_simulation() -> SimulationFixture:
value=0,
parts=MicroPart(
name="1 to 0 diversion ratio",
dataset=inside_diversion_micro_dataset,
dataset=inside_diversion_micro_dataset0,
compute_values=lambda _, p, a: np.concatenate(
[np.zeros((a.size, p.size, 1)), np.tile(p.product_ids.flat == 0, (a.size, p.size, 1))], axis=2
[np.zeros((a.size, p.size, 1)), np.tile(p.product_ids[:, 0] == 0, (a.size, p.size, 1))], axis=2
),
),
),
Expand All @@ -627,7 +663,29 @@ def large_nested_blp_simulation() -> SimulationFixture:
value=0,
parts=MicroPart(
name="1 to outside diversion ratio",
dataset=inside_diversion_micro_dataset,
dataset=inside_diversion_micro_dataset0,
compute_values=lambda _, p, a: np.concatenate(
[np.ones((a.size, p.size, 1)), np.zeros((a.size, p.size, p.size))], axis=2
),
),
),
MicroMoment(
name="1 to 0 diversion ratio, grouped",
value=0,
parts=MicroPart(
name="1 to 0 diversion ratio, grouped",
dataset=inside_diversion_micro_dataset1,
compute_values=lambda _, p, a: np.concatenate(
[np.zeros((a.size, p.size, 1)), np.tile(p.product_ids[:, 0] == 0, (a.size, p.size, 1))], axis=2
),
),
),
MicroMoment(
name="1 to outside diversion ratio, grouped",
value=0,
parts=MicroPart(
name="1 to outside diversion ratio, grouped",
dataset=inside_diversion_micro_dataset1,
compute_values=lambda _, p, a: np.concatenate(
[np.ones((a.size, p.size, 1)), np.zeros((a.size, p.size, p.size))], axis=2
),
Expand All @@ -639,7 +697,7 @@ def large_nested_blp_simulation() -> SimulationFixture:
parts=MicroPart(
name="outside to 0 diversion ratio",
dataset=outside_diversion_micro_dataset,
compute_values=lambda _, p, a: np.tile(p.product_ids.flat == 0, (a.size, 1 + p.size, 1)),
compute_values=lambda _, p, a: np.tile(p.product_ids[:, 0] == 0, (a.size, 1 + p.size, 1)),
),
),
])
Expand Down
Loading

0 comments on commit 15010d1

Please sign in to comment.