Skip to content

Commit

Permalink
Enabled experimental ensemble LDA support.
Browse files Browse the repository at this point in the history
  • Loading branch information
larsgrobe committed Jan 31, 2024
1 parent 1cefc82 commit 8bc42b2
Showing 1 changed file with 11 additions and 1 deletion.
12 changes: 11 additions & 1 deletion litstudy/nlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,20 +350,30 @@ def train_elda_model(corpus: Corpus, num_topics, num_models=4, seed=0, **kwargs)
:param kwargs: Arguments passed to `gensim.models.ensembelda.EnsembleLda` (gensim4).
"""

dic = corpus.dictionary
freqs = corpus.frequencies

from importlib.metadata import version

gensim_mayor = int(version("gensim").split(".")[0])

if gensim_mayor <= 3:
from sys import exit

exit("EnsembleLda requires at least gensim 4.")

from gensim.models.ensembelda import EnsembleLda

model = EnsembleLda(corpus=freqs, id2word=dic, num_topics=num_topics, num_models=num_models, **kwargs)
model = EnsembleLda(
corpus=freqs, id2word=dic, num_topics=num_topics, num_models=num_models, **kwargs
)

doc2topic = corpus2dense(model[freqs], num_topics).T
topic2token = model.get_topics()

return TopicModel(dic, doc2topic, topic2token)


def compute_word_distribution(corpus: Corpus, *, limit=None) -> pd.DataFrame:
"""Returns dataframe that indicates, for each word, the number of
documents that mention that word.
Expand Down

0 comments on commit 8bc42b2

Please sign in to comment.