Source code for topik.vectorizers.tests.test_tfidf
import nose.tools as nt
from topik.vectorizers.tfidf import tfidf
sample_data = [("doc1", ["frank", "frank", "frank", "dog", "cat"]),
("doc2", ["frank", "dog", "llama"]),
]
output = tfidf(sample_data)
[docs]def test_vectorize():
reference = {"doc1": {1: 0.0, 2: 0.0, 3: 0.69314718056},
"doc2": {0: 0.69314718056, 1: 0.0, 2: 0.0}}
for doc_id, doc in output.vectors.items():
for word, val in doc.items():
nt.assert_almost_equal(val, reference[doc_id][word])