Source code for topik.tokenizers.tests.test_simple

from topik.tokenizers.simple import simple, _simple_document

sample_data = [("doc1", "frank FRANK the frank dog cat"),
                ("doc2", "frank a dog of the llama"),
               ]

[docs]def test__simple_document(): assert(_simple_document(sample_data[0][1]) == ["frank", "frank", "frank", "dog", "cat"])
[docs]def test_simple(): tokenized_corpora = simple(sample_data) assert(next(tokenized_corpora) == ("doc1", ["frank", "frank", "frank", "dog", "cat"])) assert(next(tokenized_corpora) == ("doc2", ["frank", "dog", "llama"]))