From 963c16eb06293bd54f0e50f4bc9a108456a92b9e Mon Sep 17 00:00:00 2001
From: seanzhangkx8 <106214464+seanzhangkx8@users.noreply.github.com>
Date: Mon, 12 Feb 2024 20:37:57 -0500
Subject: [PATCH 1/2] wiki corpus license documentation fix
---
docs/source/wiki.rst | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/docs/source/wiki.rst b/docs/source/wiki.rst
index f4f7123e..22ed6db9 100644
--- a/docs/source/wiki.rst
+++ b/docs/source/wiki.rst
@@ -66,10 +66,10 @@ Related links
Data License
^^^^^^^^^^^^
-This dataset is governed by the `CC BY license v4.0 `_. Copyright (C) 2017-2020 The ConvoKit Developers.
+This dataset is governed by the `CC BY-SA license v4.0 `_.
Contact
^^^^^^^
-Please email any questions to: cristian@cs.cornell.edu (Cristian Danescu-Niculescu-Mizil)
\ No newline at end of file
+Please email any questions to: cristian@cs.cornell.edu (Cristian Danescu-Niculescu-Mizil)
From 738f4bda4c509e9b6f265b06865094d1eb7fe853 Mon Sep 17 00:00:00 2001
From: seanzhangkx8 <106214464+seanzhangkx8@users.noreply.github.com>
Date: Mon, 12 Feb 2024 20:42:21 -0500
Subject: [PATCH 2/2] run black formatter
---
.../expected_context_framework/col_normed_tfidf.py | 1 -
convokit/hyperconvo/hyperconvo.py | 12 ++++++------
convokit/model/corpus_helpers.py | 8 +++++---
.../politeness_api/features/vectorizer.py | 1 -
.../speakerConvoDiversity/speakerConvoDiversity.py | 1 -
.../speakerConvoDiversity/speakerConvoDiversity2.py | 1 -
.../speaker_convo_helpers/speaker_convo_attrs.py | 1 -
.../speaker_convo_helpers/speaker_convo_lifestage.py | 1 -
8 files changed, 11 insertions(+), 15 deletions(-)
diff --git a/convokit/expected_context_framework/col_normed_tfidf.py b/convokit/expected_context_framework/col_normed_tfidf.py
index 3aed04a8..bcdb645e 100644
--- a/convokit/expected_context_framework/col_normed_tfidf.py
+++ b/convokit/expected_context_framework/col_normed_tfidf.py
@@ -115,7 +115,6 @@ def dump(self, dirname):
class ColNormedTfidf(TransformerMixin):
-
"""
Model that derives tf-idf reweighted representations of utterances,
which are normalized by column. Can be used in ConvoKit through the `ColNormedTfidfTransformer` transformer; see documentation of that transformer for further details.
diff --git a/convokit/hyperconvo/hyperconvo.py b/convokit/hyperconvo/hyperconvo.py
index 94f8c44a..25af4ca7 100644
--- a/convokit/hyperconvo/hyperconvo.py
+++ b/convokit/hyperconvo/hyperconvo.py
@@ -18,17 +18,17 @@ def degree_stat_funcs(nan_val):
"norm.max": lambda l: np.max(l) / np.sum(l) if np.sum(l) > 0 else 0,
"2nd-largest": lambda l: int(np.partition(l, -2)[-2]) if len(l) > 1 else nan_val,
"2nd-argmax": lambda l: int((-l).argsort()[1]) if len(l) > 1 else nan_val,
- "norm.2nd-largest": lambda l: np.partition(l, -2)[-2] / np.sum(l)
- if (len(l) > 1 and np.sum(l) > 0)
- else nan_val,
+ "norm.2nd-largest": lambda l: (
+ np.partition(l, -2)[-2] / np.sum(l) if (len(l) > 1 and np.sum(l) > 0) else nan_val
+ ),
"mean": np.mean,
"mean-nonzero": lambda l: np.mean(l[l != 0]) if len(l[l != 0]) > 0 else 0,
"prop-nonzero": lambda l: np.mean(l != 0),
"prop-multiple": lambda l: np.mean(l[l != 0] > 1) if len(l[l != 0] > 1) > 0 else 0,
"entropy": lambda l: scipy.stats.entropy(l) if np.sum(l) > 0 else nan_val,
- "2nd-largest / max": lambda l: np.partition(l, -2)[-2] / np.max(l)
- if (len(l) > 1 and np.sum(l) > 0)
- else nan_val,
+ "2nd-largest / max": lambda l: (
+ np.partition(l, -2)[-2] / np.max(l) if (len(l) > 1 and np.sum(l) > 0) else nan_val
+ ),
}
diff --git a/convokit/model/corpus_helpers.py b/convokit/model/corpus_helpers.py
index 62a90aee..ff680ceb 100644
--- a/convokit/model/corpus_helpers.py
+++ b/convokit/model/corpus_helpers.py
@@ -577,9 +577,11 @@ def dump_utterances(corpus, dir_name, exclude_vectors, fields_to_skip):
KeyMeta: dump_helper_bin(ut.meta, d_bin, fields_to_skip.get("utterance", [])),
KeyReplyTo: ut.reply_to,
KeyTimestamp: ut.timestamp,
- KeyVectors: ut.vectors
- if exclude_vectors is None
- else list(set(ut.vectors) - set(exclude_vectors)),
+ KeyVectors: (
+ ut.vectors
+ if exclude_vectors is None
+ else list(set(ut.vectors) - set(exclude_vectors))
+ ),
}
json.dump(ut_obj, f)
f.write("\n")
diff --git a/convokit/politeness_collections/politeness_api/features/vectorizer.py b/convokit/politeness_collections/politeness_api/features/vectorizer.py
index 0d7626cf..ac6bbe5d 100644
--- a/convokit/politeness_collections/politeness_api/features/vectorizer.py
+++ b/convokit/politeness_collections/politeness_api/features/vectorizer.py
@@ -37,7 +37,6 @@ def get_unigrams_and_bigrams(document):
class PolitenessFeatureVectorizer:
-
"""
Returns document features based on-
- unigrams and bigrams
diff --git a/convokit/speakerConvoDiversity/speakerConvoDiversity.py b/convokit/speakerConvoDiversity/speakerConvoDiversity.py
index fc47f1be..d5e4a8c4 100644
--- a/convokit/speakerConvoDiversity/speakerConvoDiversity.py
+++ b/convokit/speakerConvoDiversity/speakerConvoDiversity.py
@@ -215,7 +215,6 @@ def compute_speaker_convo_divergence(
class SpeakerConvoDiversityWrapper(Transformer):
-
"""
Implements methodology for calculating linguistic diversity per life-stage. A wrapper around `SpeakerConvoDiversity`.
diff --git a/convokit/speakerConvoDiversity/speakerConvoDiversity2.py b/convokit/speakerConvoDiversity/speakerConvoDiversity2.py
index cc1b077c..ffd42c1e 100644
--- a/convokit/speakerConvoDiversity/speakerConvoDiversity2.py
+++ b/convokit/speakerConvoDiversity/speakerConvoDiversity2.py
@@ -208,7 +208,6 @@ def _set_output(self, corpus, df):
class SpeakerConvoDiversityWrapper(Transformer):
-
"""
Implements methodology for calculating linguistic diversity per life-stage. A wrapper around `SpeakerConvoDiversity`.
diff --git a/convokit/speaker_convo_helpers/speaker_convo_attrs.py b/convokit/speaker_convo_helpers/speaker_convo_attrs.py
index eb46d82c..8381ddcd 100644
--- a/convokit/speaker_convo_helpers/speaker_convo_attrs.py
+++ b/convokit/speaker_convo_helpers/speaker_convo_attrs.py
@@ -3,7 +3,6 @@
class SpeakerConvoAttrs(Transformer):
-
"""
Transformer that aggregates statistics per (speaker, convo). e.g., average wordcount of all utterances that speaker contributed per convo. Assumes that `corpus.organize_speaker_convo_history` has already been called.
diff --git a/convokit/speaker_convo_helpers/speaker_convo_lifestage.py b/convokit/speaker_convo_helpers/speaker_convo_lifestage.py
index fd43832b..eb5b5ec4 100644
--- a/convokit/speaker_convo_helpers/speaker_convo_lifestage.py
+++ b/convokit/speaker_convo_helpers/speaker_convo_lifestage.py
@@ -2,7 +2,6 @@
class SpeakerConvoLifestage(Transformer):
-
"""
Transformer that, for each speaker in a conversation, computes the lifestage of the speaker in that conversation. For instance, if lifestages are 20 conversations long, then the first 20 conversations a speaker participates in will be in lifestage 0, and the second 20 will be in lifestage 1.