From 15010d18703736b0e9767415309912e8fdffe26e Mon Sep 17 00:00:00 2001 From: Jeff Gortmaker Date: Sat, 25 Feb 2023 09:39:15 -0500 Subject: [PATCH] ENH: support elimination of groups of products for second choice data --- docs/background.rst | 4 +- pyblp/economies/economy.py | 10 ++ pyblp/economies/problem.py | 3 +- pyblp/economies/simulation.py | 3 +- pyblp/markets/economy_results_market.py | 6 +- pyblp/markets/market.py | 185 ++++++++++++++++-------- pyblp/micro.py | 20 ++- pyblp/primitives.py | 2 - pyblp/results/economy_results.py | 10 +- tests/conftest.py | 100 ++++++++++--- tests/test_blp.py | 6 +- 11 files changed, 255 insertions(+), 94 deletions(-) diff --git a/docs/background.rst b/docs/background.rst index 6866fb8..9f146f6 100644 --- a/docs/background.rst +++ b/docs/background.rst @@ -295,7 +295,9 @@ Micro moment parts based on second choice are averages over values :math:`v_{pij Its simulated analogue is -.. math:: v_p = \frac{\sum_{t \in T} \sum_{i \in I_t} \sum_{j, k \in J_t \cup \{0\}} w_{it} s_{ijt} s_{ik(-j)t} w_{d_pijkt} v_{pijkt}}{\sum_{t \in T} \sum_{i \in I_t} \sum_{j, k \in J_t \cup \{0\}} w_{it} s_{ijt} s_{ik(-j)t} w_{d_pijkt}}. +.. math:: v_p = \frac{\sum_{t \in T} \sum_{i \in I_t} \sum_{j, k \in J_t \cup \{0\}} w_{it} s_{ijt} s_{ik(-j)t} w_{d_pijkt} v_{pijkt}}{\sum_{t \in T} \sum_{i \in I_t} \sum_{j, k \in J_t \cup \{0\}} w_{it} s_{ijt} s_{ik(-j)t} w_{d_pijkt}}, + +in which :math:`s_{ik(-j)t}` is the probability of choosing :math:`k` when :math:`j` is removed from the choice set. One can also define micro moment parts based on second choices where a group of products :math:`h(j)` containing the first choice :math:`j` is removed from the choice set. In this case, the above second choice probabilities become :math:`s_{ik(-h(j))t}`. Covariances are defined analogously. diff --git a/pyblp/economies/economy.py b/pyblp/economies/economy.py index 14ee0e2..430b516 100644 --- a/pyblp/economies/economy.py +++ b/pyblp/economies/economy.py @@ -244,6 +244,16 @@ def _validate_name(self, name: Optional[str], none_valid: bool = True) -> None: if name not in names: raise NameError(f"'{name}' is not None or one of the underlying variables, {list(sorted(names))}.") + def _validate_product_ids_index(self, product_ids_index: int) -> None: + """Validate that a product IDs index is valid.""" + if not isinstance(product_ids_index, int) or product_ids_index < 0: + raise ValueError("The product IDs index must be a non-negative int.") + if self.products.product_ids.size == 0: + raise ValueError("Since the product IDs index is not None, product_data must have product_ids.") + max_index = self.products.product_ids.shape[1] - 1 + if not 0 <= product_ids_index <= max_index: + raise ValueError(f"The product IDs index should be at most {max_index}.") + def _coerce_optional_firm_ids(self, firm_ids: Optional[Any], market_ids: Optional[Array] = None) -> Array: """Coerce optional array-like firm IDs into a column vector and validate it. By default, assume that firm IDs are for all markets. diff --git a/pyblp/economies/problem.py b/pyblp/economies/problem.py index c6d6e03..4bc53b6 100644 --- a/pyblp/economies/problem.py +++ b/pyblp/economies/problem.py @@ -1279,7 +1279,8 @@ class Problem(ProblemEconomy): It may be convenient to define IDs for different products: - - **product_ids** (`object, optional`) - IDs that identify products within markets. + - **product_ids** (`object, optional`) - IDs that identify products within markets. There can be multiple + columns. Finally, clustering groups can be specified to account for within-group correlation while updating the weighting matrix and estimating standard errors: diff --git a/pyblp/economies/simulation.py b/pyblp/economies/simulation.py index 81acf07..1b80cf9 100644 --- a/pyblp/economies/simulation.py +++ b/pyblp/economies/simulation.py @@ -106,7 +106,8 @@ class Simulation(Economy): It may be convenient to define IDs for different products: - - **product_ids** (`object, optional`) - IDs that identify products within markets. + - **product_ids** (`object, optional`) - IDs that identify products within markets. There can be multiple + columns. To simulate a nested logit or random coefficients nested logit (RCNL) model, nesting groups must be specified: diff --git a/pyblp/markets/economy_results_market.py b/pyblp/markets/economy_results_market.py index b609755..8b44a9b 100644 --- a/pyblp/markets/economy_results_market.py +++ b/pyblp/markets/economy_results_market.py @@ -353,8 +353,8 @@ def safely_compute_profits( @NumericalErrorHandler(exceptions.PostEstimationNumericalError) def safely_compute_consumer_surplus( - self, keep_all: bool, eliminate_product_ids: Optional[Any], prices: Optional[Array]) -> ( - Tuple[Array, List[Error]]): + self, keep_all: bool, eliminate_product_ids: Optional[Any], product_ids_index: int, + prices: Optional[Array]) -> Tuple[Array, List[Error]]: """Estimate population-normalized consumer surplus or keep all individual-level surpluses. By default, use unchanged prices, handling any numerical errors. """ @@ -379,7 +379,7 @@ def safely_compute_consumer_surplus( # eliminate any products from the choice set if eliminate_product_ids is not None: - for j, product_id in enumerate(self.products.product_ids): + for j, product_id in enumerate(self.products.product_ids[:, product_ids_index]): if product_id in eliminate_product_ids: exp_utilities[j] = 0 diff --git a/pyblp/markets/market.py b/pyblp/markets/market.py index 2a0ff09..e6ed5ee 100644 --- a/pyblp/markets/market.py +++ b/pyblp/markets/market.py @@ -330,13 +330,16 @@ def compute_costs_derivatives( def compute_probabilities( self, delta: Array = None, mu: Optional[Array] = None, linear: bool = True, safe: bool = True, utility_reduction: Optional[Array] = None, numerator: Optional[Array] = None, - eliminate_outside: bool = False, eliminate_product: Optional[int] = None) -> Tuple[Array, Optional[Array]]: + eliminate_outside: bool = False, eliminate_product: Optional[int] = None, + eliminate_product_id: Optional[Any] = None, product_ids_index: Optional[int] = None) -> ( + Tuple[Array, Optional[Array]]): """Compute choice probabilities. By default, use unchanged delta and mu values. If linear is False, delta and mu must be specified and already be exponentiated. If safe is True, scale the logit equation by the exponential of negative the maximum utility for each agent, and if utility_reduction is specified, it should be values that have already been subtracted from the specified utility for each agent. If the numerator is specified, it will be used as the numerator in the non-nested logit expression. If any products are eliminated, eliminate the - outside option, an inside product, or both from the choice set. + outside option, an inside product, a group of products with the same product ID, or any of these from the + choice set. """ if delta is None: assert self.delta is not None @@ -385,6 +388,10 @@ def compute_probabilities( if eliminate_product is not None: exp_utilities[eliminate_product] = 0 + # optionally eliminate all products with the same product ID from the choice set + if eliminate_product_id is not None: + exp_utilities[self.products.product_ids[:, product_ids_index] == eliminate_product_id] = 0 + # compute standard probabilities if self.H == 0: if numerator is None: @@ -852,7 +859,8 @@ def compute_utility_derivatives_by_parameter_tangent( def compute_probabilities_by_parameter_tangent_mapping( self, probabilities: Array, conditionals: Optional[Array], delta: Optional[Array] = None, agent_indices: Optional[Array] = None, keep_conditionals: bool = True, - eliminate_product: Optional[int] = None) -> Tuple[Dict[int, Array], Dict[int, Optional[Array]]]: + eliminate_product: Optional[int] = None, eliminate_product_id: Optional[Any] = None, + product_ids_index: Optional[int] = None) -> Tuple[Dict[int, Array], Dict[int, Optional[Array]]]: """Computing a mapping from parameter index to tangent of probabilities with respect to a parameter. By default, use unchanged delta. By default, also compute conditionals derivatives if computed. """ @@ -860,7 +868,8 @@ def compute_probabilities_by_parameter_tangent_mapping( conditionals_mapping: Dict[int, Array] = {} for p, parameter in enumerate(self.parameters.unfixed): probabilities_mapping[p], conditionals_mapping[p] = self.compute_probabilities_by_parameter_tangent( - parameter, probabilities, conditionals, delta, agent_indices, eliminate_product + parameter, probabilities, conditionals, delta, agent_indices, eliminate_product, eliminate_product_id, + product_ids_index ) if not keep_conditionals: conditionals_mapping[p] = None @@ -908,7 +917,8 @@ def update_probabilities_by_parameter_tangent_mapping( def compute_probabilities_by_parameter_tangent( self, parameter: Parameter, probabilities: Array, conditionals: Optional[Array], delta: Optional[Array] = None, agent_indices: Optional[Array] = None, - eliminate_product: Optional[int] = None) -> Tuple[Array, Optional[Array]]: + eliminate_product: Optional[int] = None, eliminate_product_id: Optional[Any] = None, + product_ids_index: Optional[int] = None) -> Tuple[Array, Optional[Array]]: """Compute the tangent of probabilities with respect to a parameter. By default, use unchanged delta.""" if delta is None: assert self.delta is not None @@ -1006,6 +1016,10 @@ def compute_probabilities_by_parameter_tangent( if eliminate_product is not None: utilities[eliminate_product] = -np.inf + # handle any groups of eliminated products + if eliminate_product_id is not None: + utilities[self.products.product_ids[:, product_ids_index] == eliminate_product_id] = -np.inf + # compute the tangent of marginal probabilities with respect to the parameter (re-scale for robustness) utility_reduction = np.clip(utilities.max(axis=0, keepdims=True), 0, None) with np.errstate(divide='ignore', invalid='ignore'): @@ -1415,17 +1429,26 @@ def compute_micro_dataset_contributions( weights_mapping: Dict[MicroDataset, Array] = {} denominator_mapping: Dict[MicroDataset, Array] = {} outside_probabilities = None - eliminated_probabilities = outside_eliminated_probabilities = eliminated_outside_probabilities = None + eliminated_probabilities: Dict[Optional[int], Array] = {} + outside_eliminated_probabilities = None + eliminated_outside_probabilities: Dict[Optional[int], Array] = {} weights_tangent_mapping: Dict[Tuple[MicroDataset, int], Array] = {} denominator_tangent_mapping: Dict[Tuple[MicroDataset, int], Array] = {} outside_probabilities_tangent_mapping: Dict[int, Array] = {} - eliminated_probabilities_tangent_mapping: Dict[int, Array] = {} + eliminated_probabilities_tangent_mapping: Dict[Optional[int], Dict[int, Array]] = {} outside_eliminated_probabilities_tangent_mapping: Dict[int, Array] = {} - eliminated_outside_probabilities_tangent_mapping: Dict[int, Array] = {} + eliminated_outside_probabilities_tangent_mapping: Dict[Optional[int], Dict[int, Array]] = {} for dataset in datasets: if dataset in weights_mapping or (dataset.market_ids is not None and self.t not in dataset.market_ids): continue + # compute unique products IDs if necessary for second choice probabilities + ids_index = dataset.eliminated_product_ids_index + product_ids = unique_product_ids = None + if ids_index is not None: + product_ids = self.products.product_ids[:, ids_index] + unique_product_ids = np.unique(product_ids) + # compute and validate weights weights = self.compute_micro_weights(dataset, agent_indices) @@ -1446,66 +1469,112 @@ def compute_micro_dataset_contributions( # pre-compute second choice probabilities need_eliminated_probabilities = len(weights.shape) == 3 - if need_eliminated_probabilities and eliminated_probabilities is None: + if need_eliminated_probabilities and ids_index not in eliminated_probabilities: eliminated_probabilities_list = [] eliminated_conditionals_list = [] - for j in range(self.J): - # use a fast analytic trick for non-nested probabilities - eliminated_probabilities_j = eliminated_conditionals_j = None - if self.H == 0: - with np.errstate(all='ignore'): - eliminated_probabilities_j = probabilities / (1 - probabilities[j][None]) - eliminated_probabilities_j[j] = 0 + if ids_index is None: + # eliminate each product separately + for j in range(self.J): + # use a fast analytic trick when possible + eliminated_probabilities_j = eliminated_conditionals_j = None + if self.H == 0: + with np.errstate(all='ignore'): + eliminated_probabilities_j = probabilities / (1 - probabilities[j][None]) + eliminated_probabilities_j[j] = 0 + + # re-compute probabilities if there is nesting or there was a numerical error + if eliminated_probabilities_j is None or not np.isfinite(eliminated_probabilities_j).all(): + eliminated_probabilities_j, eliminated_conditionals_j = self.compute_probabilities( + delta, mu, eliminate_product=j + ) - # re-compute probabilities if there is nesting or there was a numerical error - if eliminated_probabilities_j is None or not np.isfinite(eliminated_probabilities_j).all(): + eliminated_probabilities_list.append(eliminated_probabilities_j) + eliminated_conditionals_list.append(eliminated_conditionals_j) + + eliminated_probabilities[ids_index] = np.stack(eliminated_probabilities_list) + else: + assert unique_product_ids is not None + + # eliminate each product ID separately + for product_id in unique_product_ids: eliminated_probabilities_j, eliminated_conditionals_j = self.compute_probabilities( - delta, mu, eliminate_product=j + delta, mu, eliminate_product_id=product_id, product_ids_index=ids_index ) + eliminated_probabilities_list.append(eliminated_probabilities_j) + eliminated_conditionals_list.append(eliminated_conditionals_j) - eliminated_probabilities_list.append(eliminated_probabilities_j) - eliminated_conditionals_list.append(eliminated_conditionals_j) - - eliminated_probabilities = np.stack(eliminated_probabilities_list) + eliminated_probabilities[ids_index] = np.stack( + [eliminated_probabilities_list[list(unique_product_ids).index(i)] for i in product_ids] + ) if compute_jacobians: - # use a fast analytic trick for non-nested probabilities - if self.H == 0: + eliminated_probabilities_tangent_mapping[ids_index] = {} + + # use a fast analytic trick when possible + if self.H == 0 and ids_index is None: with np.errstate(all='ignore'): - eliminated_ratios = eliminated_probabilities / probabilities[None] + eliminated_ratios = eliminated_probabilities[ids_index] / probabilities[None] eliminated_ratios[~np.isfinite(eliminated_ratios)] = 0 assert probabilities_tangent_mapping is not None for p, probabilities_tangent in probabilities_tangent_mapping.items(): - eliminated_probabilities_tangent_mapping[p] = eliminated_ratios * ( - probabilities_tangent[None] + eliminated_probabilities * probabilities_tangent[:, None] + eliminated_probabilities_tangent_mapping[ids_index][p] = eliminated_ratios * ( + probabilities_tangent[None] + + eliminated_probabilities[ids_index] * probabilities_tangent[:, None] ) else: assert xi_jacobian is not None # compute from scratch for nested probabilities eliminated_probabilities_tangent_mappings: Any = {} - for j in range(self.J): - eliminated_probabilities_tangent_mappings[j], conditionals_tangent_mapping = ( - self.compute_probabilities_by_parameter_tangent_mapping( - eliminated_probabilities[j], eliminated_conditionals_list[j], delta, agent_indices, - eliminate_product=j, keep_conditionals=False + if ids_index is None: + # eliminate each product separately + for j in range(self.J): + eliminated_probabilities_tangent_mappings[j], conditionals_tangent_mapping = ( + self.compute_probabilities_by_parameter_tangent_mapping( + eliminated_probabilities[ids_index][j], eliminated_conditionals_list[j], delta, + agent_indices, eliminate_product=j, keep_conditionals=False + ) + ) + self.update_probabilities_by_parameter_tangent_mapping( + eliminated_probabilities_tangent_mappings[j], conditionals_tangent_mapping, + eliminated_probabilities[ids_index][j], eliminated_conditionals_list[j], + xi_jacobian ) - ) - self.update_probabilities_by_parameter_tangent_mapping( - eliminated_probabilities_tangent_mappings[j], conditionals_tangent_mapping, - eliminated_probabilities[j], eliminated_conditionals_list[j], xi_jacobian - ) - for p in range(self.parameters.P): - eliminated_probabilities_tangent_mapping[p] = np.stack( - [eliminated_probabilities_tangent_mappings[j][p] for j in range(self.J)] - ) + for p in range(self.parameters.P): + eliminated_probabilities_tangent_mapping[ids_index][p] = np.stack( + [eliminated_probabilities_tangent_mappings[j][p] for j in range(self.J)] + ) + else: + assert unique_product_ids is not None + + # eliminate each product ID separately + for id_index, product_id in enumerate(unique_product_ids): + j = next(k for k, i in enumerate(product_ids) if i == product_id) + eliminated_probabilities_tangent_mappings[product_id], conditionals_tangent_mapping = ( + self.compute_probabilities_by_parameter_tangent_mapping( + eliminated_probabilities[ids_index][j], + eliminated_conditionals_list[id_index], delta, agent_indices, + eliminate_product_id=product_id, product_ids_index=ids_index, + keep_conditionals=False + ) + ) + self.update_probabilities_by_parameter_tangent_mapping( + eliminated_probabilities_tangent_mappings[product_id], conditionals_tangent_mapping, + eliminated_probabilities[ids_index][j], eliminated_conditionals_list[id_index], + xi_jacobian + ) + + for p in range(self.parameters.P): + eliminated_probabilities_tangent_mapping[ids_index][p] = np.stack( + [eliminated_probabilities_tangent_mappings[i][p] for i in product_ids] + ) # pre-compute probabilities after the outside option has been removed need_outside_eliminated_probabilities = len(weights.shape) == 3 and weights.shape[1] == 1 + self.J if need_outside_eliminated_probabilities and outside_eliminated_probabilities is None: - # use a fast analytic trick for non-nested probabilities + # use a fast analytic trick when possible outside_eliminated_probabilities = outside_eliminated_conditionals = None if self.H == 0: with np.errstate(all='ignore'): @@ -1518,7 +1587,7 @@ def compute_micro_dataset_contributions( ) if compute_jacobians: - # use a fast analytic trick for non-nested probabilities + # use a fast analytic trick when possible if self.H == 0: with np.errstate(all='ignore'): outside_eliminated_ratio = outside_eliminated_probabilities / probabilities @@ -1547,15 +1616,14 @@ def compute_micro_dataset_contributions( # pre-compute outside second choice probabilities need_eliminated_outside_probabilities = len(weights.shape) == 3 and weights.shape[2] == 1 + self.J - if need_eliminated_outside_probabilities and eliminated_outside_probabilities is None: - assert eliminated_probabilities is not None - eliminated_outside_probabilities = 1 - eliminated_probabilities.sum(axis=1) + if need_eliminated_outside_probabilities and ids_index not in eliminated_outside_probabilities: + assert ids_index in eliminated_probabilities + eliminated_outside_probabilities[ids_index] = 1 - eliminated_probabilities[ids_index].sum(axis=1) if compute_jacobians: - for p, eliminated_probabilities_tangent in eliminated_probabilities_tangent_mapping.items(): - eliminated_outside_probabilities_tangent_mapping[p] = -( - eliminated_probabilities_tangent.sum(axis=1) - ) + eliminated_outside_probabilities_tangent_mapping[ids_index] = {} + for p, tangent in eliminated_probabilities_tangent_mapping[ids_index].items(): + eliminated_outside_probabilities_tangent_mapping[ids_index][p] = -tangent.sum(axis=1) # both weights and their Jacobians will be multiplied by agent integration weights agent_weights = self.agents.weights if agent_indices is None else self.agents.weights[agent_indices] @@ -1574,17 +1642,17 @@ def compute_micro_dataset_contributions( dataset_weights[:, 0] *= outside_probabilities else: assert len(weights.shape) == 3 - assert eliminated_probabilities is not None + assert ids_index in eliminated_probabilities product = np.zeros_like(weights) - product[:, -self.J:, -self.J:] += np.moveaxis(eliminated_probabilities, (0, 1, 2), (1, 2, 0)) + product[:, -self.J:, -self.J:] += np.moveaxis(eliminated_probabilities[ids_index], (0, 1, 2), (1, 2, 0)) product[:, :, -self.J:] -= probabilities.T[:, None] product[:, -self.J:, -self.J:][:, np.arange(self.J), np.arange(self.J)] = 0 if weights.shape[1] == 1 + self.J: assert outside_eliminated_probabilities is not None product[:, 0, -self.J:] += outside_eliminated_probabilities.T if weights.shape[2] == 1 + self.J: - assert eliminated_outside_probabilities is not None and outside_probabilities is not None - product[:, -self.J:, 0] += eliminated_outside_probabilities.T + assert ids_index in eliminated_outside_probabilities and outside_probabilities is not None + product[:, -self.J:, 0] += eliminated_outside_probabilities[ids_index].T product[:, :, 0] -= outside_probabilities[:, None] if weights.shape[1] == weights.shape[2] == 1 + self.J: product[:, 0, 0] = 0 @@ -1608,19 +1676,16 @@ def compute_micro_dataset_contributions( weights_tangent[:, 0] *= outside_probabilities_tangent_mapping[p] else: assert len(weights.shape) == 3 - assert eliminated_probabilities is not None product = np.zeros_like(weights_tangent) product[:, -self.J:, -self.J:] += np.moveaxis( - eliminated_probabilities_tangent_mapping[p], (0, 1, 2), (1, 2, 0) + eliminated_probabilities_tangent_mapping[ids_index][p], (0, 1, 2), (1, 2, 0) ) product[:, :, -self.J:] -= probabilities_tangent_mapping[p].T[:, None] product[:, -self.J:, -self.J:][:, np.arange(self.J), np.arange(self.J)] = 0 if weights.shape[1] == 1 + self.J: - assert outside_eliminated_probabilities is not None product[:, 0, -self.J:] += outside_eliminated_probabilities_tangent_mapping[p].T if weights.shape[2] == 1 + self.J: - assert eliminated_outside_probabilities is not None and outside_probabilities is not None - product[:, -self.J:, 0] += eliminated_outside_probabilities_tangent_mapping[p].T + product[:, -self.J:, 0] += eliminated_outside_probabilities_tangent_mapping[ids_index][p].T product[:, :, 0] -= outside_probabilities_tangent_mapping[p][:, None] if weights.shape[1] == weights.shape[2] == 1 + self.J: product[:, 0, 0] = 0 diff --git a/pyblp/micro.py b/pyblp/micro.py index 44a050e..12a7e59 100644 --- a/pyblp/micro.py +++ b/pyblp/micro.py @@ -80,6 +80,12 @@ class MicroDataset(StringRepresentation): issue, consider setting ``pyblp.options.micro_computation_chunks`` to a value higher than its default of ``1``, such as the highest :math:`J_t`. This will cut down on memory usage without much affecting speed. + eliminated_product_ids_index : `int, optional` + This option determines whether the dataset's second choices are after only the first choice product :math:`j` is + eliminated from the choice set, in which case this should be ``None``, the default, or if a group of products + including the first choice product is eliminated, in which case this should be a number between ``0`` and the + number of columns in the ``product_ids`` field of ``product_data`` minus one, inclusive. The column of + ``product_ids`` determines the groups. market_ids : `array-like, optional` Distinct market IDs with nonzero survey weights :math:`w_{dijt}`. For other markets, :math:`w_{dijt} = 0`, and ``compute_weights`` will not be called. @@ -94,9 +100,11 @@ class MicroDataset(StringRepresentation): observations: int compute_weights: functools.partial market_ids: Optional[Set] + eliminated_product_ids_index: Optional[int] def __init__( self, name: str, observations: int, compute_weights: Callable, + eliminated_product_ids_index: Optional[int] = None, market_ids: Optional[Union[Sequence, Array]] = None) -> None: """Validate information to the greatest extent possible without an economy or calling the function.""" if not isinstance(name, str): @@ -110,6 +118,12 @@ def __init__( self.observations = observations self.compute_weights = functools.partial(compute_weights) + # validate the product IDs index + self.eliminated_product_ids_index = eliminated_product_ids_index + if eliminated_product_ids_index is not None: + if not isinstance(eliminated_product_ids_index, int) or eliminated_product_ids_index < 0: + raise ValueError("eliminated_product_ids_index must be None or a non-negative int.") + # validate market IDs, checking for duplicates if market_ids is None: self.market_ids = None @@ -128,7 +142,11 @@ def __str__(self) -> str: return f"{self.name}: {self.observations} Observations in {self._format_markets(text=True)}" def _validate(self, economy: 'Economy') -> None: - """Check that all market IDs associated with this dataset are in the economy.""" + """Check that all market IDs associated with this dataset are in the economy and that any eliminated product + IDs index is valid. + """ + if self.eliminated_product_ids_index is not None: + economy._validate_product_ids_index(self.eliminated_product_ids_index) if self.market_ids is not None: extra_ids = self.market_ids - set(economy.unique_market_ids) if extra_ids: diff --git a/pyblp/primitives.py b/pyblp/primitives.py index 349ca3b..e61d571 100644 --- a/pyblp/primitives.py +++ b/pyblp/primitives.py @@ -168,8 +168,6 @@ def __new__( raise ValueError("The firm_ids field of product_data must be one-dimensional.") if nesting_ids is not None and nesting_ids.shape[1] > 1: raise ValueError("The nesting_ids field of product_data must be one-dimensional.") - if product_ids is not None and product_ids.shape[1] > 1: - raise ValueError("The product_ids field of product_data must be one-dimensional.") if clustering_ids is not None: if clustering_ids.shape[1] > 1: raise ValueError("The clustering_ids field of product_data must be one-dimensional.") diff --git a/pyblp/results/economy_results.py b/pyblp/results/economy_results.py index c96536a..e6c3420 100644 --- a/pyblp/results/economy_results.py +++ b/pyblp/results/economy_results.py @@ -968,7 +968,7 @@ def compute_profit_hessians( def compute_consumer_surpluses( self, prices: Optional[Any] = None, keep_all: bool = False, eliminate_product_ids: Optional[Any] = None, - market_id: Optional[Any] = None) -> Array: + product_ids_index: int = 0, market_id: Optional[Any] = None) -> Array: r"""Estimate population-normalized consumer surpluses, :math:`\text{CS}`. Assuming away nonlinear income effects, the surplus in market :math:`t` is @@ -1017,6 +1017,10 @@ def compute_consumer_surpluses( IDs of the products to eliminate from the choice set. These IDs should show up in the ``product_ids`` field of ``product_data`` in :class:`Problem`. Eliminating one or more products and comparing consumer surpluses gives a measure of willingness to pay for these products. + product_ids_index : `int, optional` + Index between ``0`` and the number of columns in the ``product_ids`` field of ``product_data`` minus one, + inclusive, which determines which column of product IDs ``eliminate_product_ids`` refers to. By default, it + refers to the first column, which is index ``0``. market_id : `object, optional` ID of the market in which to compute consumer surplus. By default, consumer surpluses are computed in all markets and stacked. @@ -1036,11 +1040,13 @@ def compute_consumer_surpluses( """ output("Computing consumer surpluses with the equation that assumes away nonlinear income effects ...") + if eliminate_product_ids is not None: + self._economy._validate_product_ids_index(product_ids_index) market_ids = self._select_market_ids(market_id) prices = self._coerce_optional_prices(prices, market_ids) return self._combine_arrays( EconomyResultsMarket.safely_compute_consumer_surplus, market_ids, - fixed_args=[keep_all, eliminate_product_ids], market_args=[prices] + fixed_args=[keep_all, eliminate_product_ids, product_ids_index], market_args=[prices] ) diff --git a/tests/conftest.py b/tests/conftest.py index 5127af5..8bfff57 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -302,7 +302,7 @@ def large_blp_simulation() -> SimulationFixture: product_data={ 'market_ids': id_data.market_ids, 'firm_ids': id_data.firm_ids, - 'product_ids': product_ids, + 'product_ids': np.c_[product_ids, np.mod(product_ids, 2)], 'clustering_ids': np.random.RandomState(2).choice(range(30), id_data.size) }, beta=[1, 1, 2, 3, 1], @@ -339,12 +339,19 @@ def large_blp_simulation() -> SimulationFixture: ] } - inside_diversion_micro_dataset = MicroDataset( + inside_diversion_micro_dataset0 = MicroDataset( name="diversion from 1", observations=simulation.N, - compute_weights=lambda _, p, a: np.tile(p.product_ids == 1, (a.size, 1, 1 + p.size)), + compute_weights=lambda _, p, a: np.tile(p.product_ids[:, [0]] == 1, (a.size, 1, 1 + p.size)), market_ids=simulation.unique_market_ids[6:10], ) + inside_diversion_micro_dataset1 = MicroDataset( + name="diversion from 1, grouped", + observations=simulation.N, + compute_weights=lambda _, p, a: np.tile(p.product_ids[:, [0]] == 1, (a.size, 1, 1 + p.size)), + market_ids=simulation.unique_market_ids[6:10], + eliminated_product_ids_index=1, + ) outside_diversion_micro_dataset = MicroDataset( name="diversion from outside", observations=simulation.N, @@ -362,7 +369,7 @@ def large_blp_simulation() -> SimulationFixture: dataset=MicroDataset( name="product 0", observations=simulation.N, - compute_weights=lambda _, p, a: np.tile(p.product_ids.flat == 0, (a.size, 1)), + compute_weights=lambda _, p, a: np.tile(p.product_ids[:, 0] == 0, (a.size, 1)), ), compute_values=lambda _, p, a: np.tile(a.demographics[:, [1]], (1, p.size)), ), @@ -376,7 +383,7 @@ def large_blp_simulation() -> SimulationFixture: name="product 0 and outside", observations=simulation.N, compute_weights=lambda _, p, a: np.c_[ - np.ones((a.size, 1)), np.tile(p.product_ids.flat == 0, (a.size, 1)) + np.ones((a.size, 1)), np.tile(p.product_ids[:, 0] == 0, (a.size, 1)) ], market_ids=simulation.unique_market_ids[1:4], ), @@ -390,32 +397,54 @@ def large_blp_simulation() -> SimulationFixture: value=0, parts=MicroPart( name="1 to 0 diversion ratio", - dataset=inside_diversion_micro_dataset, + dataset=inside_diversion_micro_dataset0, compute_values=lambda _, p, a: np.concatenate( - [np.zeros((a.size, p.size, 1)), np.tile(p.product_ids.flat == 0, (a.size, p.size, 1))], axis=2 + [np.zeros((a.size, p.size, 1)), np.tile(p.product_ids[:, 0] == 0, (a.size, p.size, 1))], axis=2 ), ), ), MicroMoment( - name="outside to 0 diversion ratio", + name="1 to outside diversion ratio", value=0, parts=MicroPart( - name="outside to 0 diversion ratio", - dataset=outside_diversion_micro_dataset, - compute_values=lambda _, p, a: np.tile(p.product_ids.flat == 0, (a.size, 1 + p.size, 1)), + name="1 to outside diversion ratio", + dataset=inside_diversion_micro_dataset0, + compute_values=lambda _, p, a: np.concatenate( + [np.ones((a.size, p.size, 1)), np.zeros((a.size, p.size, p.size))], axis=2 + ), ), ), MicroMoment( - name="1 to outside diversion ratio", + name="1 to 0 diversion ratio, grouped", value=0, parts=MicroPart( - name="1 to outside diversion ratio", - dataset=inside_diversion_micro_dataset, + name="1 to 0 diversion ratio, grouped", + dataset=inside_diversion_micro_dataset1, + compute_values=lambda _, p, a: np.concatenate( + [np.zeros((a.size, p.size, 1)), np.tile(p.product_ids[:, 0] == 0, (a.size, p.size, 1))], axis=2 + ), + ), + ), + MicroMoment( + name="1 to outside diversion ratio, grouped", + value=0, + parts=MicroPart( + name="1 to outside diversion ratio, grouped", + dataset=inside_diversion_micro_dataset1, compute_values=lambda _, p, a: np.concatenate( [np.ones((a.size, p.size, 1)), np.zeros((a.size, p.size, p.size))], axis=2 ), ), ), + MicroMoment( + name="outside to 0 diversion ratio", + value=0, + parts=MicroPart( + name="outside to 0 diversion ratio", + dataset=outside_diversion_micro_dataset, + compute_values=lambda _, p, a: np.tile(p.product_ids[:, 0] == 0, (a.size, 1 + p.size, 1)), + ), + ), MicroMoment( name="unconditional diversion interaction", value=0, @@ -503,7 +532,7 @@ def large_nested_blp_simulation() -> SimulationFixture: product_data={ 'market_ids': id_data.market_ids, 'firm_ids': id_data.firm_ids, - 'product_ids': product_ids, + 'product_ids': np.c_[product_ids, np.mod(product_ids, 2)], 'nesting_ids': state.choice(['f', 'g', 'h'], id_data.size), 'clustering_ids': state.choice(range(30), id_data.size), }, @@ -546,11 +575,18 @@ def large_nested_blp_simulation() -> SimulationFixture: dataset=inside_micro_dataset, compute_values=lambda _, p, a: np.tile((a.agent_ids == 2) | (a.agent_ids == 3), (1, p.size)), ) - inside_diversion_micro_dataset = MicroDataset( + inside_diversion_micro_dataset0 = MicroDataset( name="diversion from 1", observations=simulation.N, - compute_weights=lambda _, p, a: np.tile(p.product_ids == 1, (a.size, 1, 1 + p.size)), + compute_weights=lambda _, p, a: np.tile(p.product_ids[:, [0]] == 1, (a.size, 1, 1 + p.size)), + market_ids=[simulation.unique_market_ids[2]], + ) + inside_diversion_micro_dataset1 = MicroDataset( + name="diversion from 1, grouped", + observations=simulation.N, + compute_weights=lambda _, p, a: np.tile(p.product_ids[:, [0]] == 1, (a.size, 1, 1 + p.size)), market_ids=[simulation.unique_market_ids[2]], + eliminated_product_ids_index=1, ) outside_diversion_micro_dataset = MicroDataset( name="diversion from outside", @@ -616,9 +652,9 @@ def large_nested_blp_simulation() -> SimulationFixture: value=0, parts=MicroPart( name="1 to 0 diversion ratio", - dataset=inside_diversion_micro_dataset, + dataset=inside_diversion_micro_dataset0, compute_values=lambda _, p, a: np.concatenate( - [np.zeros((a.size, p.size, 1)), np.tile(p.product_ids.flat == 0, (a.size, p.size, 1))], axis=2 + [np.zeros((a.size, p.size, 1)), np.tile(p.product_ids[:, 0] == 0, (a.size, p.size, 1))], axis=2 ), ), ), @@ -627,7 +663,29 @@ def large_nested_blp_simulation() -> SimulationFixture: value=0, parts=MicroPart( name="1 to outside diversion ratio", - dataset=inside_diversion_micro_dataset, + dataset=inside_diversion_micro_dataset0, + compute_values=lambda _, p, a: np.concatenate( + [np.ones((a.size, p.size, 1)), np.zeros((a.size, p.size, p.size))], axis=2 + ), + ), + ), + MicroMoment( + name="1 to 0 diversion ratio, grouped", + value=0, + parts=MicroPart( + name="1 to 0 diversion ratio, grouped", + dataset=inside_diversion_micro_dataset1, + compute_values=lambda _, p, a: np.concatenate( + [np.zeros((a.size, p.size, 1)), np.tile(p.product_ids[:, 0] == 0, (a.size, p.size, 1))], axis=2 + ), + ), + ), + MicroMoment( + name="1 to outside diversion ratio, grouped", + value=0, + parts=MicroPart( + name="1 to outside diversion ratio, grouped", + dataset=inside_diversion_micro_dataset1, compute_values=lambda _, p, a: np.concatenate( [np.ones((a.size, p.size, 1)), np.zeros((a.size, p.size, p.size))], axis=2 ), @@ -639,7 +697,7 @@ def large_nested_blp_simulation() -> SimulationFixture: parts=MicroPart( name="outside to 0 diversion ratio", dataset=outside_diversion_micro_dataset, - compute_values=lambda _, p, a: np.tile(p.product_ids.flat == 0, (a.size, 1 + p.size, 1)), + compute_values=lambda _, p, a: np.tile(p.product_ids[:, 0] == 0, (a.size, 1 + p.size, 1)), ), ), ]) diff --git a/tests/test_blp.py b/tests/test_blp.py index 116048a..eefa2b2 100644 --- a/tests/test_blp.py +++ b/tests/test_blp.py @@ -981,8 +981,8 @@ def test_result_positivity(simulated_problem: SimulatedProblemFixture) -> None: test_positive(results.compute_consumer_surpluses(changed_prices, market_id=t)) # compute willingness to pay when the simulation has product IDs and test its positivity - if simulation.products.product_ids.size > 0: - unique_product_ids = np.unique(simulation.products.product_ids[simulation.products.market_ids == t]) + if simulation.products.product_ids[:, 0].size > 0: + unique_product_ids = np.unique(simulation.products.product_ids[simulation.products.market_ids.flat == t, 0]) eliminate0 = results.compute_consumer_surpluses(market_id=t) eliminate1 = results.compute_consumer_surpluses(market_id=t, eliminate_product_ids=unique_product_ids[:1]) eliminate2 = results.compute_consumer_surpluses(market_id=t, eliminate_product_ids=unique_product_ids[:2]) @@ -1467,6 +1467,7 @@ def test_micro_values(simulated_problem: SimulatedProblemFixture) -> None: observations=1_000_000, compute_weights=part.dataset.compute_weights, market_ids=part.dataset.market_ids, + eliminated_product_ids_index=part.dataset.eliminated_product_ids_index, ) micro_data = simulation_results.simulate_micro_data(dataset, seed=0) @@ -1563,6 +1564,7 @@ def test_micro_scores(simulated_problem: SimulatedProblemFixture) -> None: observations=2_000, compute_weights=dataset.compute_weights, market_ids=dataset.market_ids, + eliminated_product_ids_index=dataset.eliminated_product_ids_index, ) data = data_to_dict(results.simulate_micro_data(dataset, seed=0))