Source code for topik.fileio.tests.test_outputs

import os
import unittest

import elasticsearch

from topik.fileio.base_output import load_output
from topik.fileio.reader import read_input
from topik.fileio.tests import test_data_path
from topik.fileio.out_elastic import ElasticSearchOutput
from topik.fileio.out_memory import InMemoryOutput

INDEX = "topik_unittest"
SAVE_FILENAME = "test_save.topikdata"
CONTENT_FIELD = "abstract"

# make logging quiet during testing, to keep Travis CI logs short.
import logging
logging.basicConfig()
logging.getLogger('elasticsearch').setLevel(logging.ERROR)
logging.getLogger('urllib3').setLevel(logging.ERROR)


[docs]class BaseOutputTest(object): test_raw_data = None
[docs] def test_get_filtered_data(self): data = list(self.test_raw_data.get_filtered_data(CONTENT_FIELD)) self.assertEqual(len(data), 100) self.assertFalse(data[0] == data[1])
[docs] def test_save_file(self): self.test_raw_data.save(SAVE_FILENAME) self.assertTrue(os.path.exists(SAVE_FILENAME)) os.remove(SAVE_FILENAME)
[docs] def test_load_file(self): self.test_raw_data.save(SAVE_FILENAME) self.test_raw_data = load_output(SAVE_FILENAME) data = list(self.test_raw_data.get_filtered_data(CONTENT_FIELD)) self.assertEqual(len(data), 100) os.remove(SAVE_FILENAME)
[docs] def test_get_date_filtered_data(self): result_list = list(self.test_raw_data.get_date_filtered_data(field_to_get=CONTENT_FIELD, start=1975, end=1999, filter_field="year")) self.assertEqual(len(result_list), 25) self.assertTrue(-1611117933394825767 in [int(item[0]) for item in result_list])
[docs]class TestInMemoryOutput(unittest.TestCase, BaseOutputTest):
[docs] def setUp(self): self.test_raw_data = InMemoryOutput() self.test_raw_data.import_from_iterable(read_input( '{}/test_data_json_stream.json'.format(test_data_path)), field_to_hash=CONTENT_FIELD)
[docs]class TestElasticSearchOutput(unittest.TestCase, BaseOutputTest):
[docs] def setUp(self): self.test_raw_data = ElasticSearchOutput( source='localhost', index=INDEX, content_field='abstract' ) self.test_raw_data.import_from_iterable(read_input( '{}/test_data_json_stream.json'.format(test_data_path)), field_to_hash=CONTENT_FIELD)
[docs] def tearDown(self): instance = elasticsearch.Elasticsearch("localhost") instance.indices.delete(INDEX) if instance.indices.exists("{}_year_alias_date".format(INDEX)): instance.indices.delete("{}_year_alias_date".format(INDEX))