Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

documentation fix for wiki-corpus license #210

Merged
merged 2 commits into from
Feb 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion convokit/expected_context_framework/col_normed_tfidf.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@ def dump(self, dirname):


class ColNormedTfidf(TransformerMixin):

"""
Model that derives tf-idf reweighted representations of utterances,
which are normalized by column. Can be used in ConvoKit through the `ColNormedTfidfTransformer` transformer; see documentation of that transformer for further details.
Expand Down
12 changes: 6 additions & 6 deletions convokit/hyperconvo/hyperconvo.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,17 @@ def degree_stat_funcs(nan_val):
"norm.max": lambda l: np.max(l) / np.sum(l) if np.sum(l) > 0 else 0,
"2nd-largest": lambda l: int(np.partition(l, -2)[-2]) if len(l) > 1 else nan_val,
"2nd-argmax": lambda l: int((-l).argsort()[1]) if len(l) > 1 else nan_val,
"norm.2nd-largest": lambda l: np.partition(l, -2)[-2] / np.sum(l)
if (len(l) > 1 and np.sum(l) > 0)
else nan_val,
"norm.2nd-largest": lambda l: (
np.partition(l, -2)[-2] / np.sum(l) if (len(l) > 1 and np.sum(l) > 0) else nan_val
),
"mean": np.mean,
"mean-nonzero": lambda l: np.mean(l[l != 0]) if len(l[l != 0]) > 0 else 0,
"prop-nonzero": lambda l: np.mean(l != 0),
"prop-multiple": lambda l: np.mean(l[l != 0] > 1) if len(l[l != 0] > 1) > 0 else 0,
"entropy": lambda l: scipy.stats.entropy(l) if np.sum(l) > 0 else nan_val,
"2nd-largest / max": lambda l: np.partition(l, -2)[-2] / np.max(l)
if (len(l) > 1 and np.sum(l) > 0)
else nan_val,
"2nd-largest / max": lambda l: (
np.partition(l, -2)[-2] / np.max(l) if (len(l) > 1 and np.sum(l) > 0) else nan_val
),
}


Expand Down
8 changes: 5 additions & 3 deletions convokit/model/corpus_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -577,9 +577,11 @@ def dump_utterances(corpus, dir_name, exclude_vectors, fields_to_skip):
KeyMeta: dump_helper_bin(ut.meta, d_bin, fields_to_skip.get("utterance", [])),
KeyReplyTo: ut.reply_to,
KeyTimestamp: ut.timestamp,
KeyVectors: ut.vectors
if exclude_vectors is None
else list(set(ut.vectors) - set(exclude_vectors)),
KeyVectors: (
ut.vectors
if exclude_vectors is None
else list(set(ut.vectors) - set(exclude_vectors))
),
}
json.dump(ut_obj, f)
f.write("\n")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ def get_unigrams_and_bigrams(document):


class PolitenessFeatureVectorizer:

"""
Returns document features based on-
- unigrams and bigrams
Expand Down
1 change: 0 additions & 1 deletion convokit/speakerConvoDiversity/speakerConvoDiversity.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,6 @@ def compute_speaker_convo_divergence(


class SpeakerConvoDiversityWrapper(Transformer):

"""
Implements methodology for calculating linguistic diversity per life-stage. A wrapper around `SpeakerConvoDiversity`.

Expand Down
1 change: 0 additions & 1 deletion convokit/speakerConvoDiversity/speakerConvoDiversity2.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,6 @@ def _set_output(self, corpus, df):


class SpeakerConvoDiversityWrapper(Transformer):

"""
Implements methodology for calculating linguistic diversity per life-stage. A wrapper around `SpeakerConvoDiversity`.

Expand Down
1 change: 0 additions & 1 deletion convokit/speaker_convo_helpers/speaker_convo_attrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@


class SpeakerConvoAttrs(Transformer):

"""
Transformer that aggregates statistics per (speaker, convo). e.g., average wordcount of all utterances that speaker contributed per convo. Assumes that `corpus.organize_speaker_convo_history` has already been called.

Expand Down
1 change: 0 additions & 1 deletion convokit/speaker_convo_helpers/speaker_convo_lifestage.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@


class SpeakerConvoLifestage(Transformer):

"""
Transformer that, for each speaker in a conversation, computes the lifestage of the speaker in that conversation. For instance, if lifestages are 20 conversations long, then the first 20 conversations a speaker participates in will be in lifestage 0, and the second 20 will be in lifestage 1.

Expand Down
4 changes: 2 additions & 2 deletions docs/source/wiki.rst
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,10 @@ Related links
Data License
^^^^^^^^^^^^

This dataset is governed by the `CC BY license v4.0 <https://creativecommons.org/licenses/by/4.0/>`_. Copyright (C) 2017-2020 The ConvoKit Developers.
This dataset is governed by the `CC BY-SA license v4.0 <https://creativecommons.org/licenses/by-sa/4.0/>`_.


Contact
^^^^^^^

Please email any questions to: [email protected] (Cristian Danescu-Niculescu-Mizil)
Please email any questions to: [email protected] (Cristian Danescu-Niculescu-Mizil)
Loading