Skip to content

Commit

Permalink
WIP models
Browse files Browse the repository at this point in the history
  • Loading branch information
lenhoanglnh committed Feb 17, 2025
1 parent c3fffea commit a5f46d7
Show file tree
Hide file tree
Showing 12 changed files with 115 additions and 48 deletions.
6 changes: 3 additions & 3 deletions solidago/src/solidago/modules/preference_learning/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def __call__(self,
for user in users:
logger.info(f" Learning user {user}'s base model")
result[user] = self.user_learn(user, entities,
assessments.get(user), comparisons.get(user), user_models[user].base_model()[0])
assessments.get(user), comparisons.get(user), user_models[user].base_model())
return result

@abstractmethod
Expand All @@ -32,7 +32,7 @@ def user_learn(self,
entities: Entities,
assessments: Assessments, # key_names == ["criterion", "entity_name"]
comparisons: Comparisons, # key_names == ["criterion", "left_name", "right_name"]
base_model: BaseModel
) -> BaseModel:
base_model: ScoringModel
) -> ScoringModel:
"""Learns a scoring model, given user judgments of entities """
raise NotImplementedError
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def user_learn(self,
entities: Entities,
assessments: Assessments, # Not used
comparisons: Comparisons, # key_names == ["criterion", "left_name", "right_name"]
init_model: BaseModel,
init_model: ScoringModel,
) -> DirectScoring:
""" Learns only based on comparisons """
if self.last_comparison_only:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def __call__(self, entities: Entities, user_models: UserModels) -> UserModels:
Will be scaled by the Scaling method
"""
scores = user_models.score(entities).reorder_keys(["criterion", "username", "entity_name"])
scales = ScaleDict(key_names=["criterion"]) # the same scale will apply to all users
translations = MultiScore()
for criterion in scores.get_set("criterion"):
scores_df = scores[criterion].to_df()
weights = 1 / scores_df.groupby("username").transform("size")
Expand All @@ -48,10 +48,10 @@ def __call__(self, entities: Entities, user_models: UserModels) -> UserModels:
right_uncertainties=np.array(scores_df["right_unc"], dtype=np.float64),
error=self.error,
) + self.target_score
scales[criterion] = (1, 0, 0, translation_value, 0, 0)
translations.set(criterion, translation_value)

return UserModels({
username: ScaledModel(model, scales, note="quantile_shift")
username: ScaledModel(model, translations=translations, note="quantile_shift")
for username, model in user_models
})

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def __init__(self, dev_quantile: float=0.9, lipschitz: float=0.1, error: float=1

def __call__(self, entities: Entities, user_models: UserModels) -> UserModels:
scores = user_models.score(entities).reorder_keys(["criterion", "username", "entity_name"])
scales = ScaleDict(key_names=["criterion"]) # the same scale will apply to all users
multiplicators = MultiScore()
for criterion in scores.get_set("criterion"):
scores_df = scores.to_df()
weights = 1 / scores_df.groupby("username").transform("size")
Expand All @@ -33,9 +33,9 @@ def __call__(self, entities: Entities, user_models: UserModels) -> UserModels:
default_dev=1.0,
error=self.error,
)
scales[criterion] = (1 / std_dev, 0, 0, 0, 0, 0)
multiplicators.set(criterion, 1 / std_dev)

return UserModels({
username: ScaledModel(model, scales, note="standardize")
username: ScaledModel(model, multiplicators=multiplicators, note="standardize")
for username, model in user_models
})
24 changes: 18 additions & 6 deletions solidago/src/solidago/modules/scaling/mehestan.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,17 @@ def __call__(self,
scores = user_models.score(entities).reorder_keys(["criterion", "username", "entity_name"])
logger.info(f"Mehestan 0. Terminated")

users, scales = self.compute_scales(users, scores, made_public)
users, multiplicators, translations = self.compute_scales(users, scores, made_public)
multiplicators = multiplicators.groupby(["username"])
translations = translations.groupby(["username"])

return users, UserModels({
username: ScaledModel(model, scales[username], note="Mehestan")
username: ScaledModel(
model,
multiplicators=multiplicators[username],
translations=translations[username],
note="Mehestan"
)
for username, model in user_models
})

Expand All @@ -105,7 +112,7 @@ def compute_scales(self,
users: Users, # Must have column "trust_score"
scores: MultiScore, # key_names == ["criterion", "username", "entity_name"]
made_public: MadePublic, # key_names == ["username", "entity_name"]
) -> ScaleDict: # key_names == ["username", "criterion"]
) -> tuple[Users, MultiScore, MultiScore]: # key_names == ["username", "criterion"]
""" Compute the scales for all criteria. This method should be inherited and parallelized
for heavy-load applications that can afford multi-core operations.
Expand All @@ -123,10 +130,15 @@ def compute_scales(self,
scales: ScaleDict
With key_names == ["username", "criterion"]
"""
scales = ScaleDict(key_names=["criterion", "username"])
multiplicators = MultiScore(key_names=["criterion", "username"])
translations = MultiScore(key_names=["criterion", "username"])
for criterion in scores.get_set("criterion"):
users, scales[criterion] = self.scale_criterion(users, scores[criterion], made_public, criterion)
return users, scales.reorder_keys(["username", "criterion"])
users, m, t = self.scale_criterion(users, scores[criterion], made_public, criterion)
m["criterion"] = criterion
t["criterion"] = criterion
multiplicators = multiplicators | m
translations = translations | t
return users, multiplicators, translations

def scale_criterion(self,
users: Users,
Expand Down
6 changes: 2 additions & 4 deletions solidago/src/solidago/state/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,8 @@
"Comparison", "Comparisons",
"VotingRights",
"Score", "MultiScore",
"ScoringModel", "BaseModel",
"UserModels",
"DirectScoring",
"ScaleDict", "ScaledModel",
"ScoringModel", "DirectScoring", "ScaledModel",
"PostProcessedModel", "SquashedModel",
"UserModels",
"State", "TournesolExport",
]
4 changes: 2 additions & 2 deletions solidago/src/solidago/state/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from .score import Score, MultiScore
from .base import ScoringModel, BaseModel
from .base import ScoringModel
from .user_models import UserModels
from .direct import DirectScoring
from .scaled import ScaleDict, ScaledModel
from .scaled import ScaledModel
from .post_processed import PostProcessedModel, SquashedModel
21 changes: 14 additions & 7 deletions solidago/src/solidago/state/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class ScoringModel(ABC):
saved_argsnames: list[str]=["note"]

def __init__(self,
parent: Optional["ScoringModel"]=None,
dataframes: Optional[dict[str, DataFrame]]=None,
depth: int=0,
note: str="None",
Expand All @@ -34,15 +35,16 @@ def __init__(self,
if name == "directs":
self.dfs[name] = MultiScore(df, key_names=["entity_name", "criterion"])
elif name == "multiplicators" and isinstance(df, DataFrame):
self.dfs[name] = MultiScore(df, key_names=["depth", "criterion"]).groupby(["depth"])
self.dfs[name] = MultiScore(df, key_names=["depth", "criterion"])
elif name == "translations" and isinstance(df, DataFrame):
self.dfs[name] = MultiScore(df, key_names=["depth", "criterion"]).groupby(["depth"])
if "parent" in kwargs:
if isinstance(kwargs["parent"], ScoringModel):
self.parent = kwargs["parent"]
elif isinstance(kwargs["parent"], ScoringModel):
self.dfs[name] = MultiScore(df, key_names=["depth", "criterion"])
if parent is not None:
if isinstance(parent, ScoringModel):
parent.set_depth(depth + 1)
self.parent = parent
elif isinstance(parent, ScoringModel):
import solidago.state.models as models
parent_cls, parent_kwargs = kwargs["parent"]
parent_cls, parent_kwargs = parent
self.parent = getattr(models, parent_cls)(self.dfs, depth + 1, **parent_kwargs)
else:
raise ValueError(f"{kwargs['parent']} has unhandled type {type(kwargs['parent'])}")
Expand Down Expand Up @@ -82,6 +84,11 @@ def is_base(self) -> bool:
def evaluated_entities(self, entities: "Entities") -> "Entities":
return entities if self.is_base() else self.parent.evaluated_entities(entities)

def set_depth(self, depth: int) -> None:
self.depth = depth
if not self.is_base():
self.parent.set_depth(depth + 1)

def to_direct(self, entities: "Entities") -> "DirectScoring":
from .direct import DirectScoring
direct_scoring = DirectScoring()
Expand Down
2 changes: 1 addition & 1 deletion solidago/src/solidago/state/models/direct.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from pandas import DataFrame, Series

from .score import Score, MultiScore
from .base import ScoringModel, BaseModel
from .base import ScoringModel


class DirectScoring(ScoringModel):
Expand Down
35 changes: 27 additions & 8 deletions solidago/src/solidago/state/models/scaled.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,36 @@


class ScaledModel(ScoringModel):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.multiplicators = self.dfs["multiplicators"][str(self.depth)]
self.translations = self.dfs["translations"][str(self.depth)]
def __init__(self,
parent: ScoringModel,
dataframes: Optional[dict[str, DataFrame]]=None,
depth: int=0,
note: str="None",
multipliers: Optional[MultiScore]=None,
translations: Optional[MultiScore]=None,
*args,
**kwargs
):
super().__init__(parent=parent, *args, **kwargs)
if "multipliers" not in self.dfs:
self.dfs["multipliers"] = MultiScore(key_names=["depth", "criterion"])
if "translations" not in self.dfs:
self.dfs["translations"] = MultiScore(key_names=["depth", "criterion"])

@property
def multiplier(self) -> MultiScore:
return self.dfs["multipliers"].get(depth=self.depth)

@property
def translation(self) -> MultiScore:
return self.dfs["translation"].get(depth=self.depth)

def score(self, entity: "Entity") -> MultiScore:
return self.scale(self.parent.score(entity))

def scale(self, score: MultiScore) -> MultiScore:
return self.multiplicator * score + self.translation
return self.multiplier * score + self.translation

def rescale(self, multiplicator: Score, translation: Score) -> None:
self.multiplicator *= multiplicator
self.translation = multiplicator * self.translation + translation
def rescale(self, multiplier: Score, translation: Score) -> None:
self.multiplier *= multiplier
self.translation = multiplier * self.translation + translation
25 changes: 19 additions & 6 deletions solidago/src/solidago/state/models/score.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import math

from typing import Optional, Union, Any
from pandas import Series, DataFrame

from solidago.primitives.datastructure import UnnamedDataFrame

Expand Down Expand Up @@ -172,15 +173,27 @@ def input2dict(self, *args, keys_only: bool=False, **kwargs) -> dict:
key_value_columns = self.key_names if keys_only else (self.key_names + self.value_names)
if keys_only:
args = args[:len(self.key_names)]
assert len(args) <= len(key_value_columns) + 1
assert len(args) <= len(key_value_columns) + 3
assert all({ key not in key_value_columns[:len(args)] for key in kwargs })
f = lambda v, k: str(v) if k in self.key_names else v
kwargs = { k: f(v, k) for k, v in kwargs.items() if (not keys_only or k in self.key_names) }
args_key_names = [ kn for kn in self.key_names if kn not in kwargs ]
kwargs |= { k: f(v, k) for k, v in zip(args_key_names, args[:len(args_key_names)]) }
args_values = args[len(args_key_names):]
if len(args_values) > 0 and isinstance(args_values[0], Score):
assert "score" not in kwargs
kwargs["score"] = args_values[0]
elif len(args_values) > 0:
assert "score" not in kwargs
if len(args_values) == 1:
args_values = (args_values[0], 0, 0)
assert len(args_values) == 3, args
kwargs["score"] = Score(*args_values)
if "score" in kwargs:
kwargs["value"] = kwargs["score"].value
kwargs["left_unc"] = kwargs["score"].left_unc
kwargs["right_unc"] = kwargs["score"].right_unc
del kwargs["score"]
kwargs = { k: f(v, k) for k, v in kwargs.items() if (not keys_only or k in self.key_names) }
if not self.value_names and len(args) > len(self.key_names):
assert len(args) == len(self.key_names) + 1
return kwargs | args[-1].to_dict()
Expand All @@ -205,7 +218,7 @@ def __add__(self, other: Union[Score, "MultiScore"]) -> "MultiScore":
assert self.key_names == other.key_names
keys = set(self["criterion"]) & set(other["criterion"])
return MultiScore(
data=[ (*tuple(key), *(self[key] + other[key]).to_triplet() for key in keys ],
data=[ (*tuple(key), *(self[key] + other[key]).to_triplet()) for key in keys ],
key_names=self.key_names
)

Expand All @@ -218,7 +231,7 @@ def __sub__(self, other: Union[Score, "MultiScore"]) -> "MultiScore":
assert self.key_names == other.key_names
keys = set(self["criterion"]) & set(other["criterion"])
return MultiScore(
data=[ (*tuple(key), *(self[key] - other[key]).to_triplet() for key in keys ],
data=[ (*tuple(key), *(self[key] - other[key]).to_triplet()) for key in keys ],
key_names=self.key_names
)

Expand All @@ -231,7 +244,7 @@ def __mul__(self, other: Union[Score, "MultiScore"]) -> "MultiScore":
assert self.key_names == other.key_names
keys = set(self["criterion"]) & set(other["criterion"])
return MultiScore(
data=[ (*tuple(key), *(self[key] * other[key]).to_triplet() for key in keys ],
data=[ (*tuple(key), *(self[key] * other[key]).to_triplet()) for key in keys ],
key_names=self.key_names
)

Expand All @@ -244,6 +257,6 @@ def __truediv__(self, other: Union[Score, "MultiScore"]) -> "MultiScore":
assert self.key_names == other.key_names
keys = set(self["criterion"]) & set(other["criterion"])
return MultiScore(
data=[ (*tuple(key), *(self[key] / other[key]).to_triplet() for key in keys ],
data=[ (*tuple(key), *(self[key] / other[key]).to_triplet()) for key in keys ],
key_names=self.key_names
)
26 changes: 22 additions & 4 deletions solidago/src/solidago/state/models/user_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,31 @@
from .direct import DirectScoring


class UserModels(dict):
def __init__(self, *args, default_model_cls: type=DirectScoring, **kwargs):
class UserModels:
def __init__(self,
dataframes: Optional[dict[str, DataFrame]]=None,
default_model_cls: type=DirectScoring,
*args,
**kwargs
):
""" Maps usernames to ScoringModel objects.
Useful to export/import `glued` directs / scalings dataframes. """
super().__init__(*args, **kwargs)
self.dfs = dict() if dataframes is None else dataframes
for name, df in self.dfs.items():
if isinstance(df, (str, Path)):
df_filename = df
try: df = pd.read_csv(df_filename, keep_default_na=False)
except pd.errors.EmptyDataError: df = DataFrame()
self.dfs[name] = df
if name == "directs":
self.dfs[name] = MultiScore(df, key_names=["username", "entity_name", "criterion"])
elif name == "multiplicators" and isinstance(df, DataFrame):
self.dfs[name] = MultiScore(df, key_names=["username", "depth", "criterion"])
elif name == "translations" and isinstance(df, DataFrame):
self.dfs[name] = MultiScore(df, key_names=["username", "depth", "criterion"])
self.default_model_cls = default_model_cls

self._groups = None

def default_value(self) -> ScoringModel:
return self.default_model_cls()

Expand Down

0 comments on commit a5f46d7

Please sign in to comment.