Source code for topik.viz

from __future__ import absolute_import

import logging

from topik.tests import test_data_path

[docs]class Termite(object): """A Bokeh Termite Visualization for LDA results analysis. Parameters ---------- input_file : str or pandas DataFrame A pandas dataframe from a topik model get_termite_data() containing columns "word", "topic" and "weight". May also be a string, in which case the string is a filename of a csv file with the above columns. title : str The title for your termite plot Examples -------- >>> termite = Termite("{}/termite.csv".format(test_data_path), ... "My lda results") >>> termite.plot('my_termite.html') """ def __init__(self, input_file, title): self.input_file = input_file self.title = title
[docs] def plot(self, output_file="termite.html"): import blaze as blz from odo import into import pandas as pd import bokeh.plotting as plt from bokeh.models.sources import ColumnDataSource t = blz.Data(self.input_file) MAX = blz.compute(t.weight.max()) MIN = blz.compute(t.weight.min()) # Create a size variable to define the size of the the circle for the plot. t = blz.transform(t, size=blz.sqrt((t.weight - MIN)/(MAX - MIN))*50) WORDS = t['word'].distinct() WORDS = into(list, WORDS) topics = t['topic'].distinct() topics = into(list, topics) # Convert topics to strings TOPICS = [str(i) for i in topics] source = into(pd.DataFrame, t) plt.output_file(output_file) data_source = ColumnDataSource(source) p = plt.figure(x_range=TOPICS, y_range=WORDS, plot_width=1000, plot_height=1700, title=self.title) p.circle(x="topic", y="word", size="size", fill_alpha=0.6, source=data_source) plt.show(p)
[docs]def plot_lda_vis(model_data): """Designed to work with to_py_lda_vis() in the model classes.""" from pyLDAvis import prepare, show model_vis_data = prepare(**model_data) show(model_vis_data)