Skip to content

Commit

Permalink
documentation fix for wiki-corpus license (#210)
Browse files Browse the repository at this point in the history
  • Loading branch information
seanzhangkx8 authored Feb 13, 2024
1 parent c5b2bd7 commit 73c0d7d
Show file tree
Hide file tree
Showing 9 changed files with 13 additions and 17 deletions.
1 change: 0 additions & 1 deletion convokit/expected_context_framework/col_normed_tfidf.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@ def dump(self, dirname):


class ColNormedTfidf(TransformerMixin):

"""
Model that derives tf-idf reweighted representations of utterances,
which are normalized by column. Can be used in ConvoKit through the `ColNormedTfidfTransformer` transformer; see documentation of that transformer for further details.
Expand Down
12 changes: 6 additions & 6 deletions convokit/hyperconvo/hyperconvo.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,17 @@ def degree_stat_funcs(nan_val):
"norm.max": lambda l: np.max(l) / np.sum(l) if np.sum(l) > 0 else 0,
"2nd-largest": lambda l: int(np.partition(l, -2)[-2]) if len(l) > 1 else nan_val,
"2nd-argmax": lambda l: int((-l).argsort()[1]) if len(l) > 1 else nan_val,
"norm.2nd-largest": lambda l: np.partition(l, -2)[-2] / np.sum(l)
if (len(l) > 1 and np.sum(l) > 0)
else nan_val,
"norm.2nd-largest": lambda l: (
np.partition(l, -2)[-2] / np.sum(l) if (len(l) > 1 and np.sum(l) > 0) else nan_val
),
"mean": np.mean,
"mean-nonzero": lambda l: np.mean(l[l != 0]) if len(l[l != 0]) > 0 else 0,
"prop-nonzero": lambda l: np.mean(l != 0),
"prop-multiple": lambda l: np.mean(l[l != 0] > 1) if len(l[l != 0] > 1) > 0 else 0,
"entropy": lambda l: scipy.stats.entropy(l) if np.sum(l) > 0 else nan_val,
"2nd-largest / max": lambda l: np.partition(l, -2)[-2] / np.max(l)
if (len(l) > 1 and np.sum(l) > 0)
else nan_val,
"2nd-largest / max": lambda l: (
np.partition(l, -2)[-2] / np.max(l) if (len(l) > 1 and np.sum(l) > 0) else nan_val
),
}


Expand Down
8 changes: 5 additions & 3 deletions convokit/model/corpus_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -577,9 +577,11 @@ def dump_utterances(corpus, dir_name, exclude_vectors, fields_to_skip):
KeyMeta: dump_helper_bin(ut.meta, d_bin, fields_to_skip.get("utterance", [])),
KeyReplyTo: ut.reply_to,
KeyTimestamp: ut.timestamp,
KeyVectors: ut.vectors
if exclude_vectors is None
else list(set(ut.vectors) - set(exclude_vectors)),
KeyVectors: (
ut.vectors
if exclude_vectors is None
else list(set(ut.vectors) - set(exclude_vectors))
),
}
json.dump(ut_obj, f)
f.write("\n")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ def get_unigrams_and_bigrams(document):


class PolitenessFeatureVectorizer:

"""
Returns document features based on-
- unigrams and bigrams
Expand Down
1 change: 0 additions & 1 deletion convokit/speakerConvoDiversity/speakerConvoDiversity.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,6 @@ def compute_speaker_convo_divergence(


class SpeakerConvoDiversityWrapper(Transformer):

"""
Implements methodology for calculating linguistic diversity per life-stage. A wrapper around `SpeakerConvoDiversity`.
Expand Down
1 change: 0 additions & 1 deletion convokit/speakerConvoDiversity/speakerConvoDiversity2.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,6 @@ def _set_output(self, corpus, df):


class SpeakerConvoDiversityWrapper(Transformer):

"""
Implements methodology for calculating linguistic diversity per life-stage. A wrapper around `SpeakerConvoDiversity`.
Expand Down
1 change: 0 additions & 1 deletion convokit/speaker_convo_helpers/speaker_convo_attrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@


class SpeakerConvoAttrs(Transformer):

"""
Transformer that aggregates statistics per (speaker, convo). e.g., average wordcount of all utterances that speaker contributed per convo. Assumes that `corpus.organize_speaker_convo_history` has already been called.
Expand Down
1 change: 0 additions & 1 deletion convokit/speaker_convo_helpers/speaker_convo_lifestage.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@


class SpeakerConvoLifestage(Transformer):

"""
Transformer that, for each speaker in a conversation, computes the lifestage of the speaker in that conversation. For instance, if lifestages are 20 conversations long, then the first 20 conversations a speaker participates in will be in lifestage 0, and the second 20 will be in lifestage 1.
Expand Down
4 changes: 2 additions & 2 deletions docs/source/wiki.rst
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,10 @@ Related links
Data License
^^^^^^^^^^^^

This dataset is governed by the `CC BY license v4.0 <https://creativecommons.org/licenses/by/4.0/>`_. Copyright (C) 2017-2020 The ConvoKit Developers.
This dataset is governed by the `CC BY-SA license v4.0 <https://creativecommons.org/licenses/by-sa/4.0/>`_.


Contact
^^^^^^^

Please email any questions to: [email protected] (Cristian Danescu-Niculescu-Mizil)
Please email any questions to: [email protected] (Cristian Danescu-Niculescu-Mizil)

0 comments on commit 73c0d7d

Please sign in to comment.