documentation fix for wiki-corpus license (#210)

CornellNLP · Feb 13, 2024 · 73c0d7d · 73c0d7d
1 parent c5b2bd7
commit 73c0d7d
Show file tree

Hide file tree

Showing 9 changed files with 13 additions and 17 deletions.
diff --git a/convokit/expected_context_framework/col_normed_tfidf.py b/convokit/expected_context_framework/col_normed_tfidf.py
@@ -115,7 +115,6 @@ def dump(self, dirname):
 
 
 class ColNormedTfidf(TransformerMixin):
-
     """
     Model that derives tf-idf reweighted representations of utterances,
     which are normalized by column. Can be used in ConvoKit through the `ColNormedTfidfTransformer` transformer; see documentation of that transformer for further details.

diff --git a/convokit/hyperconvo/hyperconvo.py b/convokit/hyperconvo/hyperconvo.py
@@ -18,17 +18,17 @@ def degree_stat_funcs(nan_val):
         "norm.max": lambda l: np.max(l) / np.sum(l) if np.sum(l) > 0 else 0,
         "2nd-largest": lambda l: int(np.partition(l, -2)[-2]) if len(l) > 1 else nan_val,
         "2nd-argmax": lambda l: int((-l).argsort()[1]) if len(l) > 1 else nan_val,
-        "norm.2nd-largest": lambda l: np.partition(l, -2)[-2] / np.sum(l)
-        if (len(l) > 1 and np.sum(l) > 0)
-        else nan_val,
+        "norm.2nd-largest": lambda l: (
+            np.partition(l, -2)[-2] / np.sum(l) if (len(l) > 1 and np.sum(l) > 0) else nan_val
+        ),
         "mean": np.mean,
         "mean-nonzero": lambda l: np.mean(l[l != 0]) if len(l[l != 0]) > 0 else 0,
         "prop-nonzero": lambda l: np.mean(l != 0),
         "prop-multiple": lambda l: np.mean(l[l != 0] > 1) if len(l[l != 0] > 1) > 0 else 0,
         "entropy": lambda l: scipy.stats.entropy(l) if np.sum(l) > 0 else nan_val,
-        "2nd-largest / max": lambda l: np.partition(l, -2)[-2] / np.max(l)
-        if (len(l) > 1 and np.sum(l) > 0)
-        else nan_val,
+        "2nd-largest / max": lambda l: (
+            np.partition(l, -2)[-2] / np.max(l) if (len(l) > 1 and np.sum(l) > 0) else nan_val
+        ),
     }
 
 

diff --git a/convokit/model/corpus_helpers.py b/convokit/model/corpus_helpers.py
@@ -577,9 +577,11 @@ def dump_utterances(corpus, dir_name, exclude_vectors, fields_to_skip):
                 KeyMeta: dump_helper_bin(ut.meta, d_bin, fields_to_skip.get("utterance", [])),
                 KeyReplyTo: ut.reply_to,
                 KeyTimestamp: ut.timestamp,
-                KeyVectors: ut.vectors
-                if exclude_vectors is None
-                else list(set(ut.vectors) - set(exclude_vectors)),
+                KeyVectors: (
+                    ut.vectors
+                    if exclude_vectors is None
+                    else list(set(ut.vectors) - set(exclude_vectors))
+                ),
             }
             json.dump(ut_obj, f)
             f.write("\n")

diff --git a/convokit/politeness_collections/politeness_api/features/vectorizer.py b/convokit/politeness_collections/politeness_api/features/vectorizer.py
@@ -37,7 +37,6 @@ def get_unigrams_and_bigrams(document):
 
 
 class PolitenessFeatureVectorizer:
-
     """
     Returns document features based on-
         - unigrams and bigrams

diff --git a/convokit/speakerConvoDiversity/speakerConvoDiversity.py b/convokit/speakerConvoDiversity/speakerConvoDiversity.py
@@ -215,7 +215,6 @@ def compute_speaker_convo_divergence(
 
 
 class SpeakerConvoDiversityWrapper(Transformer):
-
     """
     Implements methodology for calculating linguistic diversity per life-stage. A wrapper around `SpeakerConvoDiversity`.
 

diff --git a/convokit/speakerConvoDiversity/speakerConvoDiversity2.py b/convokit/speakerConvoDiversity/speakerConvoDiversity2.py
@@ -208,7 +208,6 @@ def _set_output(self, corpus, df):
 
 
 class SpeakerConvoDiversityWrapper(Transformer):
-
     """
     Implements methodology for calculating linguistic diversity per life-stage. A wrapper around `SpeakerConvoDiversity`.
 

diff --git a/convokit/speaker_convo_helpers/speaker_convo_attrs.py b/convokit/speaker_convo_helpers/speaker_convo_attrs.py
@@ -3,7 +3,6 @@
 
 
 class SpeakerConvoAttrs(Transformer):
-
     """
     Transformer that aggregates statistics per (speaker, convo). e.g., average wordcount of all utterances that speaker contributed per convo. Assumes that `corpus.organize_speaker_convo_history` has already been called.
 

diff --git a/convokit/speaker_convo_helpers/speaker_convo_lifestage.py b/convokit/speaker_convo_helpers/speaker_convo_lifestage.py
@@ -2,7 +2,6 @@
 
 
 class SpeakerConvoLifestage(Transformer):
-
     """
     Transformer that, for each speaker in a conversation, computes the lifestage of the speaker in that conversation. For instance, if lifestages are 20 conversations long, then the first 20 conversations a speaker participates in will be in lifestage 0, and the second 20 will be in lifestage 1.
 

diff --git a/docs/source/wiki.rst b/docs/source/wiki.rst
@@ -66,10 +66,10 @@ Related links
 Data License
 ^^^^^^^^^^^^
 
-This dataset is governed by the `CC BY license v4.0 <https://creativecommons.org/licenses/by/4.0/>`_. Copyright (C) 2017-2020 The ConvoKit Developers.
+This dataset is governed by the `CC BY-SA license v4.0 <https://creativecommons.org/licenses/by-sa/4.0/>`_.
 
 
 Contact
 ^^^^^^^
 
-Please email any questions to: [email protected] (Cristian Danescu-Niculescu-Mizil)
+Please email any questions to: [email protected] (Cristian Danescu-Niculescu-Mizil)
-Original file line number
+Diff line change
@@ Expand Up / @@ -115,7 +115,6 @@ def dump(self, dirname): @@
     class ColNormedTfidf(TransformerMixin):
         """
         Model that derives tf-idf reweighted representations of utterances,
         which are normalized by column. Can be used in ConvoKit through the `ColNormedTfidfTransformer` transformer; see documentation of that transformer for further details.
@@ Expand Down @@
Original file line number	Diff line number	Diff line change
Expand Up		@@ -215,7 +215,6 @@ def compute_speaker_convo_divergence(


		class SpeakerConvoDiversityWrapper(Transformer):

		"""
		Implements methodology for calculating linguistic diversity per life-stage. A wrapper around `SpeakerConvoDiversity`.

Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -208,7 +208,6 @@ def _set_output(self, corpus, df):


		class SpeakerConvoDiversityWrapper(Transformer):

		"""
		Implements methodology for calculating linguistic diversity per life-stage. A wrapper around `SpeakerConvoDiversity`.

Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -3,7 +3,6 @@


		class SpeakerConvoAttrs(Transformer):

		"""
		Transformer that aggregates statistics per (speaker, convo). e.g., average wordcount of all utterances that speaker contributed per convo. Assumes that `corpus.organize_speaker_convo_history` has already been called.

Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -2,7 +2,6 @@


		class SpeakerConvoLifestage(Transformer):

		"""
		Transformer that, for each speaker in a conversation, computes the lifestage of the speaker in that conversation. For instance, if lifestages are 20 conversations long, then the first 20 conversations a speaker participates in will be in lifestage 0, and the second 20 will be in lifestage 1.

Expand Down