Skip to content

Commit

Permalink
Add contributions by contributor metric and tests
Browse files Browse the repository at this point in the history
Signed-off-by: Daniel Izquierdo Cortazar <[email protected]>
  • Loading branch information
dicortazar committed Oct 14, 2020
1 parent 61340c5 commit 639fe8c
Show file tree
Hide file tree
Showing 4 changed files with 366 additions and 0 deletions.
66 changes: 66 additions & 0 deletions broomstick/data/es/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,16 @@ def exclude_org(s, org_name):
return s.exclude('term', author_org_name=org_name)


def ignore_bots(s):
"""Adds a filter for excluding bots.
:param s: the search we want to update.
:returns: the search with the exclusion filter set.
"""
return s.exclude('bool', author_bot=True)


def filter_org(s, org_name):
"""Adds a filter for retrieving only authors affiliated to the given org.
Expand Down Expand Up @@ -274,3 +284,59 @@ def contributions_count_by_org(data_source,
inplace=True)

return contribs_by_org_df


def contributions_count_by_contributor(data_source,
start_date,
end_date=None,
exclude_bots=True):
""" Gets number of contributions of each contributor.
:param data_source: `broomstick.core.DataSource`
:param start_date: date from which we want to start counting contributions
(exclusive).
:param end_date: date until we want to counts contributions to (inclusive).
`None` by default, means count everything from `start_date`.
:param exclude_bots: whether or not to exclude contributions sent by
bots.
:returns: a Pandas DataFrame with two columns:
- Contributor name.
- The number of contributions sent by that contributor to the
specified data source during the given dates.
"""

s = create_search(data_source=data_source,
start_date=start_date,
end_date=end_date)

if exclude_bots:
s = ignore_bots(s=s)

s.aggs.bucket('contributors',
'terms',
field='author_name',
order={'total_contribs': 'desc'},
size=10000)\
.metric('total_contribs',
'cardinality',
field=DS_ID_FIELD[data_source],
precision_threshold=40000)
s = s[0:0]

buckets = s.execute().to_dict()['aggregations']['contributors']['buckets']

contribs_by_contributor_df = pandas.json_normalize(buckets)

# remove `doc_count` column
contribs_by_contributor_df = contribs_by_contributor_df.drop(['doc_count'], axis=1)

contribs_by_contributor_df.rename(
columns={
'key': 'contributor',
'total_contribs.value': 'contributions'},
inplace=True)

return contribs_by_contributor_df



23 changes: 23 additions & 0 deletions broomstick/metrics/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,3 +110,26 @@ def contributions_count_by_org(data_source,
start_date=start_date,
end_date=end_date,
exclude_unknown=exclude_unknown)


def contributions_count_by_contributor(data_source,
start_date,
end_date=None,
exclude_bots=True):
""" Gets number of contributions of each contributor.
:param data_source: `broomstick.core.DataSource`
:param start_date: date from which we want to start counting contributions
(exclusive).
:param end_date: date until we want to counts contributions to (inclusive).
`None` by default, means count everything from `start_date`.
:param exclude_bots: whether or not to exclude contributions by bots
:returns: the number of contributions sent to the specified data source.
"""

return com.contributions_count_by_contributor(
data_source=data_source,
start_date=start_date,
end_date=end_date,
exclude_bots=exclude_bots)

192 changes: 192 additions & 0 deletions test/test_es_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,21 @@ def test_exclude_org(self):
author_org_name=esc.UNKNOWN_ORG_NAME)
self.assertEqual(result, 'test')

def test_ignore_bots(self):
"""Test add bot exclusion filter.
"""

s = Search()
s.exclude = MagicMock(return_value='test')

result = esc.ignore_bots(s)

s.exclude.assert_called_with(
'bool',
author_bot=True)
self.assertEqual(result, 'test')


def test_filter_org(self):
"""Tests add organization name inclusion filter.
"""
Expand Down Expand Up @@ -587,6 +602,183 @@ def test_contributions_count_by_org(self,

assert_frame_equal(result, expected_df)

@mock.patch('broomstick.data.es.common.create_search')
@mock.patch('broomstick.data.es.common.ignore_bots')
def test_contributions_count_by_contributor(self,
ignore_bots_mock,
create_search_mock):
"""Test count total contributions by contributor method.
"""
response = {
'aggregations': {
'contributors': {
'doc_count_error_upper_bound': 0,
'sum_other_doc_count': 0,
'buckets': [
{
'key': 'Anne',
'doc_count': 213,
'total_contribs': {
'value': 179
}
},
{
'key': 'Bob',
'doc_count': 130,
'total_contribs': {
'value': 125
}
},
{
'key': 'Carl',
'doc_count': 33,
'total_contribs': {
'value': 30
}
}
]
}
}
}

expected_data = {
'contributor': ['Anne', 'Bob', 'Carl'],
'contributions': [179, 125, 30]
}

expected_df = pandas.DataFrame(
expected_data,
columns=['contributor', 'contributions'])

# Create a mocked Search
s, r = self.__create_mocked_search(response, create_search_mock)

# Mock `ignore_bots` to return our mocked `Search` object
ignore_bots_mock.return_value = s

# Test with start and end dates
#

start_date = '2018-01-01'
end_date = '2020-01-01'

result = esc.contributions_count_by_contributor(
DataSource.GIT,
start_date=start_date,
end_date=end_date)

create_search_mock.assert_called_with(
data_source=DataSource.GIT,
start_date=start_date,
end_date=end_date)
ignore_bots_mock.assert_called_with(
s=s)

s.aggs.bucket.assert_called_with(
'contributors',
'terms',
field='author_name',
order={'total_contribs': 'desc'},
size=10000)
s.aggs.bucket().metric.assert_called_with(
'total_contribs',
'cardinality',
field='hash',
precision_threshold=40000)

assert_frame_equal(result, expected_df)

# Test with start date only
#

result = esc.contributions_count_by_contributor(
DataSource.ALL,
start_date=start_date)

create_search_mock.assert_called_with(
data_source=DataSource.ALL,
start_date=start_date,
end_date=None)
ignore_bots_mock.assert_called_with(
s=s)

s.aggs.bucket.assert_called_with(
'contributors',
'terms',
field='author_name',
order={'total_contribs': 'desc'},
size=10000)
s.aggs.bucket().metric.assert_called_with(
'total_contribs',
'cardinality',
field='painless_unique_id',
precision_threshold=40000)

assert_frame_equal(result, expected_df)

# Test (not) ignoring bots and both dates
#

# Reset the `ignore_bots_mock` call number
ignore_bots_mock.reset_mock()

result = esc.contributions_count_by_contributor(
DataSource.GIT,
start_date=start_date,
end_date=end_date,
exclude_bots=False)

create_search_mock.assert_called_with(
data_source=DataSource.GIT,
start_date=start_date,
end_date=end_date)
ignore_bots_mock.assert_not_called()

s.aggs.bucket.assert_called_with(
'contributors',
'terms',
field='author_name',
order={'total_contribs': 'desc'},
size=10000)
s.aggs.bucket().metric.assert_called_with(
'total_contribs',
'cardinality',
field='hash',
precision_threshold=40000)

assert_frame_equal(result, expected_df)

# Test (not) ignoring bots and start date only
#

# Reset the `ignore_bots_mock` call number
ignore_bots_mock.reset_mock()

result = esc.contributions_count_by_contributor(
DataSource.GIT,
start_date=start_date,
exclude_bots=False)

create_search_mock.assert_called_with(
data_source=DataSource.GIT,
start_date=start_date,
end_date=None)
ignore_bots_mock.assert_not_called()

s.aggs.bucket.assert_called_with(
'contributors',
'terms',
field='author_name',
order={'total_contribs': 'desc'},
size=10000)
s.aggs.bucket().metric.assert_called_with(
'total_contribs',
'cardinality',
field='hash',
precision_threshold=40000)

assert_frame_equal(result, expected_df)

def __create_mocked_search(self, response, create_search_mock):
# Create a mocked Search
s = MagicMock()
Expand Down
Loading

0 comments on commit 639fe8c

Please sign in to comment.