Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Experiment with hqcases removal #35766

Open
wants to merge 16 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions corehq/apps/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ cloudcare
domain
Domains, called "project spaces" in user-facing content, are the core sandboxes of CommCare. Almost
all features and UIs in CommCare are in the context of a domain.
experiments
Framework for comparing old and new code paths.
export
Configurations for exporting project data, typically forms or cases, to an excel download.
hqmedia
Expand Down
33 changes: 23 additions & 10 deletions corehq/apps/data_pipeline_audit/dbacessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,33 +6,45 @@

from corehq.apps import es
from corehq.apps.es import aggregations
from corehq.apps.experiments import Experiment, RM_HQCASES
from corehq.form_processor.backends.sql.dbaccessors import doc_type_to_state
from corehq.form_processor.models import CommCareCase, XFormInstance
from corehq.sql_db.util import get_db_aliases_for_partitioned_query

experiment = Experiment(
campaign=RM_HQCASES,
old_args={},
new_args={"escls": es.CaseSearchES},
)

def get_es_counts_by_doc_type(domain, es_indices=None, extra_filters=None):
es_indices = es_indices or (es.CaseES, es.FormES, es.UserES, es.AppES, es.GroupES)

@experiment
def get_es_counts_by_doc_type(domain, es_indices=None, extra_filters=None, escls=es.CaseES):
if es_indices and escls is not es.CaseES:
es_indices = [(escls if x is es.CaseES else x) for x in es_indices]
es_indices = es_indices or (escls, es.FormES, es.UserES, es.AppES, es.GroupES)
counter = Counter()
for es_query in es_indices:
counter += get_index_counts_by_domain_doc_type(es_query, domain, extra_filters)
counter += _get_index_counts_by_domain_doc_type(es_query, domain, extra_filters)

return counter


def get_es_case_counts(domain, doc_type, gte, lt):
@experiment(is_equal=lambda a, b: abs(a - b) < 5)
def get_es_case_counts(domain, doc_type, gte, lt, escls=es.CaseES):
ex = es.cases.server_modified_range(gte=gte, lt=lt)
return es.cases.CaseES().domain(domain).filter(ex).filter(
return escls().domain(domain).filter(ex).filter(
es.filters.OR(
es.filters.doc_type(doc_type),
es.filters.doc_type(doc_type.lower()),
)
).count()


def get_es_case_range(domain):
@experiment
def get_es_case_range(domain, escls=es.CaseES):
def descending_query(order):
result = es.CaseES().domain(domain).sort(
result = escls().domain(domain).sort(
'server_modified_on', desc=order).size(1).run().raw_hits
if len(result) == 0:
return None
Expand All @@ -44,7 +56,7 @@ def descending_query(order):
)


def get_index_counts_by_domain_doc_type(es_query_class, domain, extra_filters=None):
def _get_index_counts_by_domain_doc_type(es_query_class, domain, extra_filters=None):
"""
:param es_query_class: Subclass of ``HQESQuery``
:param domain: Domain name to filter on
Expand Down Expand Up @@ -155,11 +167,12 @@ def get_primary_db_form_ids(domain, doc_type, startdate, enddate):
return sql_ids


def get_es_case_ids(domain, doc_type, startdate, enddate):
@experiment
def get_es_case_ids(domain, doc_type, startdate, enddate, escls=es.CaseES):
datefilter = None
if startdate or enddate:
datefilter = es.cases.server_modified_range(gte=startdate, lt=enddate)
return _get_es_doc_ids(es.CaseES, domain, doc_type, datefilter)
return _get_es_doc_ids(escls, domain, doc_type, datefilter)


def get_es_form_ids(domain, doc_type, startdate, enddate):
Expand Down
1 change: 1 addition & 0 deletions corehq/apps/domain/tests/test_deletion_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
'domain.SuperuserProjectEntryRecord',
'dropbox.DropboxUploadHelper',
'export.DefaultExportSettings',
'experiments.ExperimentEnabler',
'field_audit.AuditEvent',
'fixtures.UserLookupTableStatus',
'fixtures.LookupTableRow', # handled by cascading delete
Expand Down
1 change: 1 addition & 0 deletions corehq/apps/dump_reload/tests/test_dump_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@
"dropbox.DropboxUploadHelper",
"enterprise.EnterpriseMobileWorkerSettings", # tied to an account, not a domain
"enterprise.EnterprisePermissions",
"experiments.ExperimentEnabler",
"export.DefaultExportSettings", # tied to an account, not a domain
"export.EmailExportWhenDoneRequest", # transient model tied to an export task
"form_processor.DeprecatedXFormAttachmentSQL",
Expand Down
44 changes: 44 additions & 0 deletions corehq/apps/experiments/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Experiments Framework for Comparing Old and New Code Paths

The Experiments framework allows performance and results comparisons of old and
new code paths. This is useful for testing new implementations against existing
ones to ensure they perform as expected before adopting the new implementation.

Inspired by [Github Scientist](https://github.blog/developer-skills/application-development/scientist/).

## Features

- **Performance Recording**: The framework records execution times and sends
`commcare.experiment` metrics to Datadog. When metrics are enabled for only
the old or new code path, timing of the respective code path is recorded along
with an `enabled` tag value of `old` or `new`. When `both` are enabled, an
additional "diff" metric is recorded with the duration of the new code path as
a percentage of the old code path time.
- **Result Comparison**: It compares the results of the old and new code paths
and logs unexpected differences to Sentry. The comparison can be customized
by passing an `is_equal` callable to the experiment decorator.
- **Experiment Management**: Experiments can be enabled or disabled using
`ExperimentEnabler` records in Django Admin. All experiments are disabled by
default, meaning only the old code path is run until `enabled_percent` is set
to a value greater than zero. See `enabled_percent` docs for more details on
fine-grained experiment control. Enabled states are cached, so changes in
Django Admin may not apply immediately.

## Usage

To define an experiment, use the `Experiment` class to create a decorator for
the function you want to test.

```python
from corehq.apps.experiments import Experiment, MOON_SHOT

experiment = Experiment(
campaign=MOON_SHOT,
old_args={"feature": "valley"},
new_args={"feature": "crater"},
)

@experiment
def function(other, args, *, feature):
... # implementation
```
4 changes: 4 additions & 0 deletions corehq/apps/experiments/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .experiment import Experiment # noqa: F401

# campaigns
RM_HQCASES = 'rm-hqcases'
9 changes: 9 additions & 0 deletions corehq/apps/experiments/admin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from django.contrib import admin

from .models import ExperimentEnabler


@admin.register(ExperimentEnabler)
class ExperimentEnablerAdmin(admin.ModelAdmin):
list_display = ('campaign', 'path', 'enabled_percent')
ordering = ('campaign', 'path')
168 changes: 168 additions & 0 deletions corehq/apps/experiments/experiment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
import time
import warnings
from functools import wraps
from operator import eq

from attrs import asdict, define, field
from dimagi.utils.logging import notify_error, notify_exception

from corehq.util.metrics import metrics_histogram


@define
class Experiment:
"""Experiment decorator factory for comparing old and new code paths.

:param campaign: Experiment campaign name for a group of experiments.
Should be defined as a constant in `corehq/apps/experiments/__init__.py`.
:param old_args: A dict of keyword arguments to invoke the old code path.
:param new_args: A dict of keyword arguments to invoke the new code path.
:param time_buckets: A tuple of histogram buckets for the old code
path timer.
:param percent_buckets: A tuple of histogram buckets for the new
code path time as a percentage of the old code path time. For
example, if the old code path takes 1 second and the new code
path takes 2 seconds, the percentage is 2 / 1 or 200%.
:param ignore_duplicate: If true, suppress warning about duplicate
experiment. Default: false.
"""
campaign = field()
old_args = field(factory=dict)
new_args = field(factory=dict)
is_equal = field(default=eq)
time_buckets = field(default=(0.01, 0.1, 1, 10, 100))
percent_buckets = field(default=(50, 95, 105, 200))
ignore_duplicate = field(default=False)
path = field(default="")

def __call__(self, func=None, /, **kw):
experiment = type(self)(**(asdict(self) | kw))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

til - python 3.9 introduced | operator to merge two dicts 🤯

if func is None:
return experiment
experiment.path = f"{func.__module__}.{func.__qualname__}"
return experiment.decorate(func)

@property
def tags(self):
return {"campaign": self.campaign, "path": self.path}

def decorate(self, func):
from .models import is_enabled, should_record_metrics

@wraps(func)
def wrapper(*args, **kwargs):
enabled = is_enabled(self.campaign, self.path)
metrics_enabled = enabled or should_record_metrics(self.campaign, self.path)
start = time.time()
old_result, new_result, old_error, new_error, mid = self._run(enabled, func, args, kwargs)

if metrics_enabled:
end = time.time()
old_time = mid - start
metrics_histogram(
"commcare.experiment.time", old_time, tags=self.tags | ENABLED_TAG[enabled],
bucket_tag='duration', buckets=self.time_buckets, bucket_unit='s',
)
if not enabled:
if old_error is not None:
raise old_error
return old_result

new_time = end - mid
diff_pct = (new_time / old_time * 100) if old_time else over_the_top
tags = {"notify": "none"}
try:
self._compare(
old_result, new_result,
old_error, new_error,
func, args, kwargs, tags,
)
finally:
metrics_histogram(
"commcare.experiment.diff", diff_pct, tags=self.tags | tags,
bucket_tag='duration', buckets=self.percent_buckets, bucket_unit='%',
)
return old_result

self._warn_on_duplicate()
over_the_top = self.percent_buckets[-1] + 1
wrapper.experiment = self
return wrapper

def _run(self, enabled, func, args, kwargs):
old_result = new_result = old_error = new_error = None
if enabled is not None:
try:
old_result = func(*args, **kwargs, **self.old_args)
except Exception as err:
old_error = err
mid = time.time()
if enabled or enabled is None:
try:
new_result = func(*args, **kwargs, **self.new_args)
except Exception as err:
new_error = err
if enabled is None:
# run new code path and not old
mid = time.time()
if new_error is None:
old_result = new_result
else:
old_error = new_error
return old_result, new_result, old_error, new_error, mid

def _compare(self, old_result, new_result, old_error, new_error, func, args, kwargs, tags):
if old_error is not None:
if new_error is None:
new_repr = repr(new_result)
elif type(new_error) is type(old_error) and new_error.args == old_error.args:
raise old_error
else:
new_repr = f"raised {describe(type(new_error), new_error.args, {})}"
old_repr = describe(type(old_error), old_error.args, {})
notify_exception(
None, f"{describe(func, args, kwargs)}: raised {old_repr} != {new_repr}",
details=self.tags, exec_info=new_error or old_error,
)
tags["notify"] = "error"
raise old_error
if new_error is not None:
notify_exception(None, "new code path failed in experiment",
details=self.tags, exec_info=new_error)
tags["notify"] = "error"
else:
try:
equal = self.is_equal(old_result, new_result)
except Exception as err:
notify_exception(err, "is_equal failed in experiment", details=self.tags)
equal = False
if not equal:
tags["notify"] = "diff"
notify_error(
f"{describe(func, args, kwargs)}: {old_result!r} != {new_result!r}",
details=self.tags,
)

def _warn_on_duplicate(self):
key = self.campaign, self.path
if key in _all_experiments and not self.ignore_duplicate:
warnings.warn(f"Duplicate experiment {key} will result in "
"conflated metrics from multiple experiments",
stacklevel=4)
_all_experiments.add(key)


def describe(func, args, kwargs, vlen=20):
def rep(v):
rval = repr(v)
return (rval[:vlen] + "...") if len(rval) > vlen else rval
argv = [rep(a) for a in args] + [f"{k}={rep(v)}" for k, v in kwargs.items()]
return f"{func.__name__}({', '.join(argv)})"


ENABLED_TAG = {
True: {"enabled": "both"},
False: {"enabled": "old"},
None: {"enabled": "new"},
}
_all_experiments = set()
27 changes: 27 additions & 0 deletions corehq/apps/experiments/migrations/0001_initial.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Generated by Django 4.2.17 on 2025-01-17 13:43

import django.core.validators
from django.db import migrations, models


class Migration(migrations.Migration):

initial = True

dependencies = [
]

operations = [
migrations.CreateModel(
name='ExperimentEnabler',
fields=[
('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('campaign', models.CharField(help_text='Identifier for a group of related experiments.', max_length=255)),
('path', models.CharField(blank=True, help_text='Example: corehq.apps.experiments.func Partial paths may be specified to match all experiments in a namespace. An empty string matches all experiments.', max_length=1024)),
('enabled_percent', models.SmallIntegerField(default=0, help_text='Zero means run only old, -1 to disable metrics as well. 1-100 means % of time to run new. 101 means run only new, 102 to disable metrics as well.', validators=[django.core.validators.MinValueValidator(-1), django.core.validators.MaxValueValidator(102)])),
],
options={
'unique_together': {('campaign', 'path')},
},
),
]
Empty file.
Loading
Loading