Source code for topik.vectorizers.tests.test_output

from topik.vectorizers.vectorizer_output import VectorizerOutput

sample_data = [("doc1", ["frank", "frank", "frank", "dog", "cat"]),
                ("doc2", ["frank", "dog", "llama"]),
               ]

output = VectorizerOutput(sample_data, lambda x, y: x)


[docs]def test_global_term_count(): assert(output.global_term_count == 4)
[docs]def test_document_term_count(): assert(output.document_term_counts == {"doc1": 3, "doc2": 3})
[docs]def test_term_frequency(): # TODO: is there a better place to put this such that it gets tested on all vectorization methods? assert(type(output.term_frequency[1]) == int)