From 963c16eb06293bd54f0e50f4bc9a108456a92b9e Mon Sep 17 00:00:00 2001 From: seanzhangkx8 <106214464+seanzhangkx8@users.noreply.github.com> Date: Mon, 12 Feb 2024 20:37:57 -0500 Subject: [PATCH 1/2] wiki corpus license documentation fix --- docs/source/wiki.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/wiki.rst b/docs/source/wiki.rst index f4f7123e..22ed6db9 100644 --- a/docs/source/wiki.rst +++ b/docs/source/wiki.rst @@ -66,10 +66,10 @@ Related links Data License ^^^^^^^^^^^^ -This dataset is governed by the `CC BY license v4.0 `_. Copyright (C) 2017-2020 The ConvoKit Developers. +This dataset is governed by the `CC BY-SA license v4.0 `_. Contact ^^^^^^^ -Please email any questions to: cristian@cs.cornell.edu (Cristian Danescu-Niculescu-Mizil) \ No newline at end of file +Please email any questions to: cristian@cs.cornell.edu (Cristian Danescu-Niculescu-Mizil) From 738f4bda4c509e9b6f265b06865094d1eb7fe853 Mon Sep 17 00:00:00 2001 From: seanzhangkx8 <106214464+seanzhangkx8@users.noreply.github.com> Date: Mon, 12 Feb 2024 20:42:21 -0500 Subject: [PATCH 2/2] run black formatter --- .../expected_context_framework/col_normed_tfidf.py | 1 - convokit/hyperconvo/hyperconvo.py | 12 ++++++------ convokit/model/corpus_helpers.py | 8 +++++--- .../politeness_api/features/vectorizer.py | 1 - .../speakerConvoDiversity/speakerConvoDiversity.py | 1 - .../speakerConvoDiversity/speakerConvoDiversity2.py | 1 - .../speaker_convo_helpers/speaker_convo_attrs.py | 1 - .../speaker_convo_helpers/speaker_convo_lifestage.py | 1 - 8 files changed, 11 insertions(+), 15 deletions(-) diff --git a/convokit/expected_context_framework/col_normed_tfidf.py b/convokit/expected_context_framework/col_normed_tfidf.py index 3aed04a8..bcdb645e 100644 --- a/convokit/expected_context_framework/col_normed_tfidf.py +++ b/convokit/expected_context_framework/col_normed_tfidf.py @@ -115,7 +115,6 @@ def dump(self, dirname): class ColNormedTfidf(TransformerMixin): - """ Model that derives tf-idf reweighted representations of utterances, which are normalized by column. Can be used in ConvoKit through the `ColNormedTfidfTransformer` transformer; see documentation of that transformer for further details. diff --git a/convokit/hyperconvo/hyperconvo.py b/convokit/hyperconvo/hyperconvo.py index 94f8c44a..25af4ca7 100644 --- a/convokit/hyperconvo/hyperconvo.py +++ b/convokit/hyperconvo/hyperconvo.py @@ -18,17 +18,17 @@ def degree_stat_funcs(nan_val): "norm.max": lambda l: np.max(l) / np.sum(l) if np.sum(l) > 0 else 0, "2nd-largest": lambda l: int(np.partition(l, -2)[-2]) if len(l) > 1 else nan_val, "2nd-argmax": lambda l: int((-l).argsort()[1]) if len(l) > 1 else nan_val, - "norm.2nd-largest": lambda l: np.partition(l, -2)[-2] / np.sum(l) - if (len(l) > 1 and np.sum(l) > 0) - else nan_val, + "norm.2nd-largest": lambda l: ( + np.partition(l, -2)[-2] / np.sum(l) if (len(l) > 1 and np.sum(l) > 0) else nan_val + ), "mean": np.mean, "mean-nonzero": lambda l: np.mean(l[l != 0]) if len(l[l != 0]) > 0 else 0, "prop-nonzero": lambda l: np.mean(l != 0), "prop-multiple": lambda l: np.mean(l[l != 0] > 1) if len(l[l != 0] > 1) > 0 else 0, "entropy": lambda l: scipy.stats.entropy(l) if np.sum(l) > 0 else nan_val, - "2nd-largest / max": lambda l: np.partition(l, -2)[-2] / np.max(l) - if (len(l) > 1 and np.sum(l) > 0) - else nan_val, + "2nd-largest / max": lambda l: ( + np.partition(l, -2)[-2] / np.max(l) if (len(l) > 1 and np.sum(l) > 0) else nan_val + ), } diff --git a/convokit/model/corpus_helpers.py b/convokit/model/corpus_helpers.py index 62a90aee..ff680ceb 100644 --- a/convokit/model/corpus_helpers.py +++ b/convokit/model/corpus_helpers.py @@ -577,9 +577,11 @@ def dump_utterances(corpus, dir_name, exclude_vectors, fields_to_skip): KeyMeta: dump_helper_bin(ut.meta, d_bin, fields_to_skip.get("utterance", [])), KeyReplyTo: ut.reply_to, KeyTimestamp: ut.timestamp, - KeyVectors: ut.vectors - if exclude_vectors is None - else list(set(ut.vectors) - set(exclude_vectors)), + KeyVectors: ( + ut.vectors + if exclude_vectors is None + else list(set(ut.vectors) - set(exclude_vectors)) + ), } json.dump(ut_obj, f) f.write("\n") diff --git a/convokit/politeness_collections/politeness_api/features/vectorizer.py b/convokit/politeness_collections/politeness_api/features/vectorizer.py index 0d7626cf..ac6bbe5d 100644 --- a/convokit/politeness_collections/politeness_api/features/vectorizer.py +++ b/convokit/politeness_collections/politeness_api/features/vectorizer.py @@ -37,7 +37,6 @@ def get_unigrams_and_bigrams(document): class PolitenessFeatureVectorizer: - """ Returns document features based on- - unigrams and bigrams diff --git a/convokit/speakerConvoDiversity/speakerConvoDiversity.py b/convokit/speakerConvoDiversity/speakerConvoDiversity.py index fc47f1be..d5e4a8c4 100644 --- a/convokit/speakerConvoDiversity/speakerConvoDiversity.py +++ b/convokit/speakerConvoDiversity/speakerConvoDiversity.py @@ -215,7 +215,6 @@ def compute_speaker_convo_divergence( class SpeakerConvoDiversityWrapper(Transformer): - """ Implements methodology for calculating linguistic diversity per life-stage. A wrapper around `SpeakerConvoDiversity`. diff --git a/convokit/speakerConvoDiversity/speakerConvoDiversity2.py b/convokit/speakerConvoDiversity/speakerConvoDiversity2.py index cc1b077c..ffd42c1e 100644 --- a/convokit/speakerConvoDiversity/speakerConvoDiversity2.py +++ b/convokit/speakerConvoDiversity/speakerConvoDiversity2.py @@ -208,7 +208,6 @@ def _set_output(self, corpus, df): class SpeakerConvoDiversityWrapper(Transformer): - """ Implements methodology for calculating linguistic diversity per life-stage. A wrapper around `SpeakerConvoDiversity`. diff --git a/convokit/speaker_convo_helpers/speaker_convo_attrs.py b/convokit/speaker_convo_helpers/speaker_convo_attrs.py index eb46d82c..8381ddcd 100644 --- a/convokit/speaker_convo_helpers/speaker_convo_attrs.py +++ b/convokit/speaker_convo_helpers/speaker_convo_attrs.py @@ -3,7 +3,6 @@ class SpeakerConvoAttrs(Transformer): - """ Transformer that aggregates statistics per (speaker, convo). e.g., average wordcount of all utterances that speaker contributed per convo. Assumes that `corpus.organize_speaker_convo_history` has already been called. diff --git a/convokit/speaker_convo_helpers/speaker_convo_lifestage.py b/convokit/speaker_convo_helpers/speaker_convo_lifestage.py index fd43832b..eb5b5ec4 100644 --- a/convokit/speaker_convo_helpers/speaker_convo_lifestage.py +++ b/convokit/speaker_convo_helpers/speaker_convo_lifestage.py @@ -2,7 +2,6 @@ class SpeakerConvoLifestage(Transformer): - """ Transformer that, for each speaker in a conversation, computes the lifestage of the speaker in that conversation. For instance, if lifestages are 20 conversations long, then the first 20 conversations a speaker participates in will be in lifestage 0, and the second 20 will be in lifestage 1.